Spaces:
Running
Running
Donald Winkelman
commited on
Commit
Β·
e9afeb8
1
Parent(s):
ec77913
Updating Side-By-Side Space
Browse files
app.py
CHANGED
@@ -1,247 +1,31 @@
|
|
1 |
import gradio as gr
|
2 |
-
import os
|
3 |
import time
|
4 |
-
import sys
|
5 |
from datetime import datetime
|
6 |
|
7 |
-
# Try to import llama_cpp
|
8 |
-
try:
|
9 |
-
from llama_cpp import Llama
|
10 |
-
|
11 |
-
LLAMA_CPP_AVAILABLE = True
|
12 |
-
print("llama_cpp is available!")
|
13 |
-
except ImportError:
|
14 |
-
LLAMA_CPP_AVAILABLE = False
|
15 |
-
print("llama_cpp is not available. Running in fallback mode.")
|
16 |
-
|
17 |
-
# Define the GGUF model paths
|
18 |
-
BASE_MODEL_PATH = "unsloth/Qwen3-4B-GGUF/Qwen3-4B-Q4_K_M.gguf"
|
19 |
-
NOVEL_MODEL_PATH = "mradermacher/Clinician-Note-2.0a-i1-GGUF/Clinician-Note-2.0a.i1-Q4_K_M.gguf"
|
20 |
-
|
21 |
-
# Initialize models
|
22 |
-
base_model = None
|
23 |
-
novel_model = None
|
24 |
-
|
25 |
-
|
26 |
-
def load_models(progress=None):
|
27 |
-
"""Load the llama.cpp models"""
|
28 |
-
global base_model, novel_model
|
29 |
-
|
30 |
-
if not LLAMA_CPP_AVAILABLE:
|
31 |
-
print("llama_cpp not available, cannot load models")
|
32 |
-
return False
|
33 |
-
|
34 |
-
try:
|
35 |
-
# Load base model
|
36 |
-
if progress:
|
37 |
-
progress(0.2, desc="Loading base model... This may take a few minutes")
|
38 |
-
|
39 |
-
print(f"Loading base model from {BASE_MODEL_PATH}")
|
40 |
-
base_model = Llama(
|
41 |
-
model_path=BASE_MODEL_PATH,
|
42 |
-
n_ctx=2048, # Context window size
|
43 |
-
n_threads=4 # Number of CPU threads to use
|
44 |
-
)
|
45 |
-
|
46 |
-
# Load novel model
|
47 |
-
if progress:
|
48 |
-
progress(0.7, desc="Loading novel model... This may take a few minutes")
|
49 |
-
|
50 |
-
print(f"Loading novel model from {NOVEL_MODEL_PATH}")
|
51 |
-
novel_model = Llama(
|
52 |
-
model_path=NOVEL_MODEL_PATH,
|
53 |
-
n_ctx=2048, # Context window size
|
54 |
-
n_threads=4 # Number of CPU threads to use
|
55 |
-
)
|
56 |
-
|
57 |
-
if progress:
|
58 |
-
progress(1.0, desc="Models loaded successfully!")
|
59 |
-
|
60 |
-
print("Models loaded successfully!")
|
61 |
-
return True
|
62 |
-
|
63 |
-
except Exception as e:
|
64 |
-
print(f"Error loading models: {str(e)}")
|
65 |
-
if progress:
|
66 |
-
progress(1.0, desc=f"Error loading models: {str(e)}")
|
67 |
-
return False
|
68 |
-
|
69 |
-
|
70 |
-
def format_prompt_for_llama(prompt_template, transcript):
|
71 |
-
"""Format the prompt for llama.cpp models"""
|
72 |
-
if not prompt_template.strip():
|
73 |
-
# Use default prompt header if not provided
|
74 |
-
prompt_template = """
|
75 |
-
**Role:** You are an AI assistant specialized in generating clinical SOAP notes.
|
76 |
-
|
77 |
-
**Task:** Generate a concise, accurate, and clinically relevant SOAP note based **STRICTLY AND SOLELY** on the provided doctor-patient interaction transcript.
|
78 |
-
|
79 |
-
**CRITICAL INSTRUCTIONS:**
|
80 |
-
|
81 |
-
1. **Strict Transcript Adherence:** Generate the SOAP note using **ONLY** information **explicitly stated** within the provided transcript.
|
82 |
-
2. **NO Assumptions or External Knowledge:** **DO NOT** infer information, add details not mentioned (even if clinically likely), make assumptions, or use external medical knowledge. Adherence to the transcript is paramount.
|
83 |
-
3. **Standard SOAP Structure:** Organize the output clearly into the following sections using **EXACTLY** these headings:
|
84 |
-
* **S β Subjective**
|
85 |
-
* **O β Objective**
|
86 |
-
* **A β Assessment**
|
87 |
-
* **P β Plan**
|
88 |
-
4. **NO Extraneous Text:** The output must contain **ONLY** the four section headings (S, O, A, P) and the corresponding content derived *directly* from the transcript. **DO NOT** include introductory sentences (e.g., "Here is the SOAP note:"), concluding remarks, disclaimers, notes about the generation process, metadata, or *any* other text before, between, or after the S/O/A/P sections.
|
89 |
-
"""
|
90 |
-
|
91 |
-
# Simple chat template format for llama.cpp
|
92 |
-
full_prompt = f"""<|im_start|>system
|
93 |
-
You are a medical assistant specialized in creating SOAP notes from doctor-patient transcripts.
|
94 |
-
<|im_end|>
|
95 |
-
<|im_start|>user
|
96 |
-
{prompt_template}
|
97 |
-
|
98 |
-
Transcript: {transcript}
|
99 |
-
<|im_end|>
|
100 |
-
<|im_start|>assistant
|
101 |
-
"""
|
102 |
-
|
103 |
-
return full_prompt
|
104 |
-
|
105 |
|
106 |
def generate_soap_notes(transcript, prompt_template, temperature=0.3, top_p=0.9, top_k=20, progress=gr.Progress()):
|
107 |
-
"""
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
"Failed to load models. Running in demo mode.",
|
127 |
-
generate_fallback_soap_note("base"),
|
128 |
-
"Failed to load models. Running in demo mode.",
|
129 |
-
generate_fallback_soap_note("novel")
|
130 |
-
)
|
131 |
-
|
132 |
-
# Format prompt
|
133 |
-
formatted_prompt = format_prompt_for_llama(prompt_template, transcript)
|
134 |
-
|
135 |
-
try:
|
136 |
-
# Generate with base model
|
137 |
-
progress(0.4, desc="Generating with base model...")
|
138 |
-
|
139 |
-
base_output = base_model(
|
140 |
-
formatted_prompt,
|
141 |
-
max_tokens=1024,
|
142 |
-
temperature=temperature,
|
143 |
-
top_p=top_p,
|
144 |
-
top_k=int(top_k),
|
145 |
-
stop=["<|im_end|>", "<|im_start|>"]
|
146 |
-
)
|
147 |
-
|
148 |
-
base_text = base_output["choices"][0]["text"] if "choices" in base_output else ""
|
149 |
-
base_thinking = f"Generated using llama.cpp at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
150 |
-
base_content = format_soap_note(base_text)
|
151 |
-
|
152 |
-
# Generate with novel model
|
153 |
-
progress(0.8, desc="Generating with novel model...")
|
154 |
-
|
155 |
-
novel_output = novel_model(
|
156 |
-
formatted_prompt,
|
157 |
-
max_tokens=1024,
|
158 |
-
temperature=temperature,
|
159 |
-
top_p=top_p,
|
160 |
-
top_k=int(top_k),
|
161 |
-
stop=["<|im_end|>", "<|im_start|>"]
|
162 |
-
)
|
163 |
-
|
164 |
-
novel_text = novel_output["choices"][0]["text"] if "choices" in novel_output else ""
|
165 |
-
novel_thinking = f"Generated using llama.cpp at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
166 |
-
novel_content = format_soap_note(novel_text)
|
167 |
-
|
168 |
-
progress(1.0, desc="Done!")
|
169 |
-
|
170 |
-
return base_thinking, base_content, novel_thinking, novel_content
|
171 |
-
|
172 |
-
except Exception as e:
|
173 |
-
error_msg = f"Error generating SOAP notes: {str(e)}"
|
174 |
-
print(error_msg)
|
175 |
-
progress(1.0, desc=error_msg)
|
176 |
-
|
177 |
-
return (
|
178 |
-
error_msg,
|
179 |
-
generate_fallback_soap_note("base"),
|
180 |
-
error_msg,
|
181 |
-
generate_fallback_soap_note("novel")
|
182 |
-
)
|
183 |
-
|
184 |
-
|
185 |
-
def format_soap_note(text):
|
186 |
-
"""Format the output to ensure it follows SOAP structure"""
|
187 |
-
# If the text is empty or very short, return a structured fallback
|
188 |
-
if not text or len(text) < 50:
|
189 |
-
return generate_fallback_soap_note("base")
|
190 |
-
|
191 |
-
# Check if the text already has SOAP sections
|
192 |
-
if "**S β" in text or "S β" in text or "**S -" in text:
|
193 |
-
# Text already seems structured, return as is with minor cleaning
|
194 |
-
# Remove any text before the first section
|
195 |
-
for section_start in ["**S β", "S β", "**S -"]:
|
196 |
-
if section_start in text:
|
197 |
-
start_idx = text.find(section_start)
|
198 |
-
if start_idx > 0:
|
199 |
-
text = text[start_idx:]
|
200 |
-
break
|
201 |
-
|
202 |
-
return text.strip()
|
203 |
-
|
204 |
-
# If no structure found, extract content and format it manually
|
205 |
-
lines = text.strip().split('\n')
|
206 |
-
formatted_text = ""
|
207 |
-
|
208 |
-
# Add structure if it's missing
|
209 |
-
if "Subjective" in text or "SUBJECTIVE" in text:
|
210 |
-
formatted_text += "**S β Subjective**\n"
|
211 |
-
elif not any(s in text.upper() for s in ["S β", "S:", "SUBJECTIVE"]):
|
212 |
-
formatted_text += "**S β Subjective**\n"
|
213 |
-
|
214 |
-
if "Objective" in text or "OBJECTIVE" in text:
|
215 |
-
formatted_text += "\n**O β Objective**\n"
|
216 |
-
elif not any(s in text.upper() for s in ["O β", "O:", "OBJECTIVE"]):
|
217 |
-
formatted_text += "\n**O β Objective**\n"
|
218 |
-
|
219 |
-
if "Assessment" in text or "ASSESSMENT" in text:
|
220 |
-
formatted_text += "\n**A β Assessment**\n"
|
221 |
-
elif not any(s in text.upper() for s in ["A β", "A:", "ASSESSMENT"]):
|
222 |
-
formatted_text += "\n**A β Assessment**\n"
|
223 |
-
|
224 |
-
if "Plan" in text or "PLAN" in text:
|
225 |
-
formatted_text += "\n**P β Plan**\n"
|
226 |
-
elif not any(s in text.upper() for s in ["P β", "P:", "PLAN"]):
|
227 |
-
formatted_text += "\n**P β Plan**\n"
|
228 |
-
|
229 |
-
# If we had to add structure, the original text was not properly formatted
|
230 |
-
# In this case, return a fallback
|
231 |
-
if formatted_text and formatted_text != text:
|
232 |
-
return generate_fallback_soap_note("base")
|
233 |
-
|
234 |
-
return text.strip()
|
235 |
-
|
236 |
-
|
237 |
-
def generate_fallback_soap_note(model_type):
|
238 |
-
"""Generate a fallback SOAP note when model generation fails"""
|
239 |
-
if model_type == "base":
|
240 |
-
return """**S β Subjective**
|
241 |
-
Patient complains of migraine for 10 hours, described as severe and unresponsive to medication. Reports experiencing migraines about once a month, sometimes more. Current migraine started with blurry vision and pain in right eye. Reports photophobia, phonophobia, and nausea. Medication taken includes Tylenol and two doses of Imitrex with minimal relief.
|
242 |
|
243 |
**O β Objective**
|
244 |
-
Lungs clear bilaterally. Heart sounds normal
|
245 |
|
246 |
**A β Assessment**
|
247 |
Migraine with aura, unresponsive to Imitrex.
|
@@ -251,46 +35,43 @@ Migraine with aura, unresponsive to Imitrex.
|
|
251 |
2. Prescription for Rizatriptan
|
252 |
3. Recommendation to maintain migraine diary to identify triggers
|
253 |
4. Follow up with primary care physician"""
|
254 |
-
|
255 |
-
|
256 |
-
|
|
|
257 |
|
258 |
**O β Objective**
|
259 |
-
Lungs clear bilaterally. Heart sounds normal without murmurs, rubs, or gallops. Extraocular movements intact. Pupils equal, round, reactive to light and accommodation. No sinus tenderness. Normal lymph nodes. No tongue deviation.
|
260 |
|
261 |
**A β Assessment**
|
262 |
-
Migraine with aura, unresponsive to current medication (Imitrex).
|
263 |
|
264 |
**P β Plan**
|
265 |
1. Trial of new triptan medication (Rizatriptan) and oxygen therapy
|
266 |
-
2. Prescription for Rizatriptan
|
267 |
3. Recommendation to maintain migraine diary to identify triggers
|
268 |
-
4. Follow up with primary care physician
|
|
|
|
|
|
|
|
|
269 |
|
270 |
|
271 |
# Create Gradio interface
|
272 |
with gr.Blocks() as demo:
|
273 |
gr.Markdown("# Clinical SOAP Note Generator")
|
|
|
|
|
274 |
|
275 |
-
|
276 |
-
gr.Markdown("""
|
277 |
-
## β οΈ Important: llama-cpp-python Not Installed
|
278 |
|
279 |
-
|
|
|
280 |
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
The interface below will operate in demo mode only.
|
286 |
-
""")
|
287 |
-
else:
|
288 |
-
gr.Markdown("""
|
289 |
-
## GGUF Model-based SOAP Note Generator
|
290 |
-
|
291 |
-
This app uses lightweight GGUF models via llama.cpp to generate SOAP notes from doctor-patient transcripts.
|
292 |
-
Models will be loaded when you first generate a note.
|
293 |
-
""")
|
294 |
|
295 |
with gr.Row():
|
296 |
with gr.Column():
|
@@ -345,13 +126,13 @@ with gr.Blocks() as demo:
|
|
345 |
generate_btn = gr.Button("Generate SOAP Notes", variant="primary")
|
346 |
|
347 |
with gr.Column():
|
348 |
-
with gr.Tab("
|
349 |
base_thinking = gr.Textbox(label="Model Process", lines=3, interactive=False)
|
350 |
-
base_content = gr.Textbox(label="
|
351 |
|
352 |
-
with gr.Tab("
|
353 |
novel_thinking = gr.Textbox(label="Model Process", lines=3, interactive=False)
|
354 |
-
novel_content = gr.Textbox(label="
|
355 |
|
356 |
# Connect the generate button
|
357 |
generate_btn.click(
|
@@ -524,5 +305,65 @@ Doctor: Sounds like a good plan. All right Lisa, take care and feel better.
|
|
524 |
inputs=[transcript]
|
525 |
)
|
526 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
527 |
if __name__ == "__main__":
|
528 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
|
|
2 |
import time
|
|
|
3 |
from datetime import datetime
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
def generate_soap_notes(transcript, prompt_template, temperature=0.3, top_p=0.9, top_k=20, progress=gr.Progress()):
|
7 |
+
"""
|
8 |
+
Demo function that simulates SOAP note generation without requiring any models.
|
9 |
+
This version is guaranteed to work in any environment.
|
10 |
+
"""
|
11 |
+
# Simulate processing time with progress updates
|
12 |
+
for i in range(10):
|
13 |
+
progress(i / 10, desc=f"Processing... {i * 10}%")
|
14 |
+
time.sleep(0.2) # Short delays for demo purposes
|
15 |
+
|
16 |
+
# Timestamp for the mock generation
|
17 |
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
18 |
+
|
19 |
+
# Mock outputs - these are pre-written SOAP notes
|
20 |
+
base_thinking = f"Demo mode - no actual model inference performed (timestamp: {timestamp})"
|
21 |
+
novel_thinking = f"Demo mode - no actual model inference performed (timestamp: {timestamp})"
|
22 |
+
|
23 |
+
# Generate basic SOAP note
|
24 |
+
base_content = """**S β Subjective**
|
25 |
+
Patient reports migraine for 10 hours with blurry vision and pain in right eye. Reports taking Tylenol and two doses of Imitrex with minimal relief. Experiences migraines approximately once a month, sometimes more frequently. Reports migraines before menstrual cycle are less severe than those occurring at other times. Describes photophobia, phonophobia, movement sensitivity, and nausea.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
**O β Objective**
|
28 |
+
Lungs clear bilaterally. Heart sounds normal without murmurs, rubs, or gallops. Pupils equal, round, reactive to light and accommodation. No sinus tenderness. Normal lymph nodes. No tongue deviation. Normal movement and strength, symmetrical. Normal neurological exam.
|
29 |
|
30 |
**A β Assessment**
|
31 |
Migraine with aura, unresponsive to Imitrex.
|
|
|
35 |
2. Prescription for Rizatriptan
|
36 |
3. Recommendation to maintain migraine diary to identify triggers
|
37 |
4. Follow up with primary care physician"""
|
38 |
+
|
39 |
+
# Generate detailed SOAP note
|
40 |
+
novel_content = """**S β Subjective**
|
41 |
+
Patient reports migraine headache for approximately 10 hours, describing it as more severe and longer-lasting than usual episodes. Patient states it began with blurry vision and pain in right eye, with pain progressively worsening. Reports taking Tylenol and two doses of Imitrex with minimal relief. States migraines occur about once a month, sometimes more frequently, with menstrual migraines being less severe than those occurring at other times. Reports photophobia, phonophobia, movement sensitivity, and nausea without vomiting. Patient describes that being still, lying down in the dark, and quiet environment provides some relief but does not eliminate pain. Reports seeing stars around lights and flashes behind closed eyes. Denies loss of consciousness, chest pain, shortness of breath, numbness, or weakness in extremities.
|
42 |
|
43 |
**O β Objective**
|
44 |
+
Lungs clear bilaterally. Heart sounds normal without murmurs, rubs, or gallops. Extraocular movements intact with patient following finger appropriately. Pupils equal, round, reactive to light and accommodation. No sinus tenderness on palpation. Normal lymph nodes on neck examination. No tongue deviation. Balance test normal with eyes closed and arms extended. Normal strength and movement in upper and lower extremities, symmetrical. Patient reports previous CT/MRI from 5 years ago was normal.
|
45 |
|
46 |
**A β Assessment**
|
47 |
+
Migraine with aura, unresponsive to current medication (Imitrex). Patient describes typical symptoms but with extended duration.
|
48 |
|
49 |
**P β Plan**
|
50 |
1. Trial of new triptan medication (Rizatriptan) and oxygen therapy
|
51 |
+
2. Prescription for Rizatriptan with verified pharmacy information
|
52 |
3. Recommendation to maintain migraine diary to identify triggers
|
53 |
+
4. Follow up with primary care physician
|
54 |
+
5. Education on preventative approaches versus treatment of acute episodes"""
|
55 |
+
|
56 |
+
progress(1.0, desc="Done!")
|
57 |
+
return base_thinking, base_content, novel_thinking, novel_content
|
58 |
|
59 |
|
60 |
# Create Gradio interface
|
61 |
with gr.Blocks() as demo:
|
62 |
gr.Markdown("# Clinical SOAP Note Generator")
|
63 |
+
gr.Markdown("""
|
64 |
+
This app demonstrates the generation of SOAP notes from doctor-patient transcripts.
|
65 |
|
66 |
+
## β οΈ Note: Currently Running in Demo Mode
|
|
|
|
|
67 |
|
68 |
+
This is a demonstration that shows example outputs without requiring any AI models.
|
69 |
+
For a production version, you would use one of these approaches:
|
70 |
|
71 |
+
1. Use the Hugging Face Inference API instead of local models
|
72 |
+
2. Use pre-built llama-cpp-python wheels
|
73 |
+
3. Use smaller models that fit within memory constraints
|
74 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
with gr.Row():
|
77 |
with gr.Column():
|
|
|
126 |
generate_btn = gr.Button("Generate SOAP Notes", variant="primary")
|
127 |
|
128 |
with gr.Column():
|
129 |
+
with gr.Tab("Basic Model Output"):
|
130 |
base_thinking = gr.Textbox(label="Model Process", lines=3, interactive=False)
|
131 |
+
base_content = gr.Textbox(label="Basic SOAP Note", lines=20, interactive=False)
|
132 |
|
133 |
+
with gr.Tab("Detailed Model Output"):
|
134 |
novel_thinking = gr.Textbox(label="Model Process", lines=3, interactive=False)
|
135 |
+
novel_content = gr.Textbox(label="Detailed SOAP Note", lines=20, interactive=False)
|
136 |
|
137 |
# Connect the generate button
|
138 |
generate_btn.click(
|
|
|
305 |
inputs=[transcript]
|
306 |
)
|
307 |
|
308 |
+
# Information about production alternatives
|
309 |
+
with gr.Accordion("Options for a Production Version", open=False):
|
310 |
+
gr.Markdown("""
|
311 |
+
### Option 1: Use the Hugging Face Inference API
|
312 |
+
|
313 |
+
Instead of loading models locally, you can use the Hugging Face Inference API to make API calls to hosted models:
|
314 |
+
|
315 |
+
```python
|
316 |
+
from huggingface_hub import InferenceClient
|
317 |
+
|
318 |
+
client = InferenceClient("ClinicianFOCUS/Clinician-Note-2.0a")
|
319 |
+
|
320 |
+
def generate_with_api(prompt):
|
321 |
+
response = client.text_generation(
|
322 |
+
prompt,
|
323 |
+
max_new_tokens=1024,
|
324 |
+
temperature=0.3,
|
325 |
+
top_p=0.9,
|
326 |
+
top_k=20
|
327 |
+
)
|
328 |
+
return response
|
329 |
+
```
|
330 |
+
|
331 |
+
### Option 2: Use Pre-built llama-cpp-python Wheels
|
332 |
+
|
333 |
+
You can use pre-built wheels for llama-cpp-python to avoid building from source:
|
334 |
+
|
335 |
+
```
|
336 |
+
# In requirements.txt
|
337 |
+
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.38/llama_cpp_python-0.2.38-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
338 |
+
```
|
339 |
+
|
340 |
+
### Option 3: Use the Hugging Face Text Generation Interface (TGI)
|
341 |
+
|
342 |
+
TGI is an optimized inference server for text generation:
|
343 |
+
|
344 |
+
```python
|
345 |
+
import requests
|
346 |
+
|
347 |
+
API_URL = "https://api-inference.huggingface.co/models/ClinicianFOCUS/Clinician-Note-2.0a"
|
348 |
+
headers = {"Authorization": f"Bearer {API_TOKEN}"}
|
349 |
+
|
350 |
+
def query(payload):
|
351 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
352 |
+
return response.json()
|
353 |
+
|
354 |
+
output = query({
|
355 |
+
"inputs": "Your prompt here",
|
356 |
+
"parameters": {"max_new_tokens": 1024}
|
357 |
+
})
|
358 |
+
```
|
359 |
+
|
360 |
+
### Option 4: Use Smaller Models
|
361 |
+
|
362 |
+
You can use smaller models that fit within memory constraints, like:
|
363 |
+
- TinyLlama (1.1B parameters)
|
364 |
+
- Distilbert (66M parameters)
|
365 |
+
- FLAN-T5-small (80M parameters)
|
366 |
+
""")
|
367 |
+
|
368 |
if __name__ == "__main__":
|
369 |
demo.launch()
|