|
import os |
|
import spaces |
|
import gradio as gr |
|
|
|
import torch |
|
from peft import PeftModel |
|
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer |
|
from huggingface_hub import login as hf_login |
|
|
|
import xgrammar as xgr |
|
from pydantic import BaseModel |
|
|
|
hf_login(token=os.getenv("HF_TOKEN")) |
|
|
|
model_name = "meta-llama/Llama-3.2-1B-Instruct" |
|
device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, torch_dtype=torch.float32, device_map=device |
|
) |
|
|
|
checkpoint = "gregorlied/Llama-3.2-1B-Instruct-Medical-Report-Summarization" |
|
model = PeftModel.from_pretrained(model, checkpoint) |
|
model = model.merge_and_unload() |
|
|
|
class Person(BaseModel): |
|
life_style: str |
|
family_history: str |
|
social_history: str |
|
medical_surgical_history: str |
|
signs_symptoms: str |
|
comorbidities: str |
|
diagnostic_techniques_procedures: str |
|
diagnosis: str |
|
laboratory_values: str |
|
pathology: str |
|
pharmacological_therapy: str |
|
interventional_therapy: str |
|
patient_outcome_assessment: str |
|
age: str |
|
gender: str |
|
|
|
config = AutoConfig.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
tokenizer_info = xgr.TokenizerInfo.from_huggingface( |
|
tokenizer, vocab_size=config.vocab_size |
|
) |
|
|
|
grammar_compiler = xgr.GrammarCompiler(tokenizer_info) |
|
compiled_grammar = grammar_compiler.compile_json_schema(Person) |
|
xgr_logits_processor = xgr.contrib.hf.LogitsProcessor(compiled_grammar) |
|
|
|
prompt = """You are a text extraction system for clinical reports. |
|
Please extract relevant clinical information from the report. |
|
|
|
### Instructions |
|
|
|
- Use the JSON Schema given below. |
|
- Return only a valid JSON object β no markdown, no comments. |
|
- If no relevant facts are given for a field, set its value to "N/A". |
|
- If multile relevant facts are given for a field, separate them with "; ". |
|
|
|
### JSON Schema |
|
|
|
{ |
|
'life_style': '', |
|
'family_history': '', |
|
'social_history': '', |
|
'medical_surgical_history': '', |
|
'signs_symptoms': '', |
|
'comorbidities': '', |
|
'diagnostic_techniques_procedures': '', |
|
'diagnosis': '', |
|
'laboratory_values': '', |
|
'pathology': '', |
|
'pharmacological_therapy': '', |
|
'interventional_therapy': '', |
|
'patient_outcome_assessment': '', |
|
'age': '', |
|
'gender': '', |
|
} |
|
|
|
### Clinical Report |
|
""" |
|
|
|
@spaces.GPU(duration=60) |
|
def summarize(text): |
|
if not text.strip(): |
|
return "Please enter some text to summarize." |
|
|
|
messages = [ |
|
{"role": "system", "content": prompt}, |
|
{"role": "user", "content": text}, |
|
] |
|
|
|
model_inputs = tokenizer([text], return_tensors="pt").to(device) |
|
|
|
generated_ids = model.generate( |
|
input_ids=model_inputs["input_ids"], |
|
attention_mask = model_inputs["attention_mask"], |
|
|
|
|
|
|
|
|
|
max_new_tokens=2048, |
|
logits_processor=[xgr_logits_processor] |
|
) |
|
|
|
generated_ids = [ |
|
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) |
|
] |
|
|
|
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) |
|
return response[0] |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## π Structured Information Extraction for Clinical Reports") |
|
|
|
with gr.Row(): |
|
input_text = gr.Textbox( |
|
label="Clinical Report", |
|
autoscroll=False, |
|
lines=15, |
|
max_lines=15, |
|
placeholder="Paste your clinical report here...", |
|
) |
|
|
|
with gr.Row(): |
|
gr.CheckboxGroup( |
|
label="Countries", |
|
info="Where are they from?", |
|
choices=[ |
|
"Life Style", |
|
"Family History", |
|
"Social History", |
|
"Medical Surgical History", |
|
"Signs and Symptoms", |
|
"Comorbidities", |
|
"Diagnostic Techniques and Procedures", |
|
"Diagnosis", |
|
"Laboratory Values", |
|
"Pathology", |
|
"Pharmacological Therapy", |
|
"Interventional Therapy", |
|
"Patient Outcome Assessment", |
|
"Age", |
|
"Gender", |
|
] |
|
) |
|
|
|
with gr.Row(): |
|
summarize_btn = gr.Button("Summarize") |
|
|
|
with gr.Row(): |
|
output_text = gr.Textbox( |
|
label="Summary", |
|
autoscroll=False, |
|
lines=15, |
|
max_lines=15, |
|
show_copy_button=True, |
|
) |
|
|
|
with gr.Row(): |
|
examples = gr.Examples( |
|
label="Examples", |
|
examples=[ |
|
"""A 48-year-old Indian woman was referred to the department of periodontology, with the chief complaint of swollen gums. She felt discomfort with the disfigurement of gums which appeared un esthetic due to its more severity and there was bleeding and difficulty while chewing food. |
|
Past medical history revealed that she is under medication for hypertension with amlodipine (10 mg/day orally) for the last 2 years and 6 months. The amlodipine dose was increased to 50 mg/day orally and statins were prescribed due to the acute angina attack and hypercholesterolemia before 6 months of the dental visit. |
|
Clinical examination was carried out by assessing the periodontal status by plaque index (PI), gingival index (GI), Russel's periodontal index. The patients oral hygiene status revealed the presence of more amount of plaque and some amount of calculus on both anterior and posterior surfaces of the teeth due to the presence of new niches for accumulation of plaque and microorganisms. There was generalized bleeding on probing and generalized probing depths ranging from 3 to 8 mm with greatest depths in relation to mandibular anteriors. Due to the outward enlargement of gingiva, there were no deep periodontal pockets. |
|
Intraorally, there was generalized GO on the labial and lingual/palatal surface of the maxillary and mandibular teeth, which was more pronounced in the labial surfaces than the lingual and palatal surfaces. The interdental papillae were enlarged, fibrous, and lobulated in appearance mainly around the mandibular and maxillary anterior teeth []. |
|
In this case report, photographic analysis by Ellis and Seymour was used for assessing the gingival encroachment or overgrowth on adjacent surfaces for a gingival unit (0 = no encroachment of interdental papilla on tooth surface, 1 = mild encroachment producing a blunted papilla tip, 2 = moderate encroachment involving lateral spread of papilla across buccal tooth surface of less than one quarter tooth width, 3 = marked encroachment of papilla, more than One-fourth tooth width with loss of interdental papilla form).[] In this case report, score 3 severity of gingival enlargement was observed. |
|
Grade III mobility was observed in relation to mandibular anteriors. Generalized reddish color, purulent discharge in relation to mandibular anteriors and generalized bleeding on probing were observed due to the generalized inflammation of gingiva. Radiographic examination revealed generalized horizontal bone loss with more destruction of bone in relation to maxillary and mandibular anterior region []. |
|
Blood sample was taken at the patients first visit to the dental hospital. Serum total CHO, HDL, low density lipoprotein (LDL), and triglycerides (TG) were determined by autoanalyzer in the clinical laboratory. |
|
Several methods have been employed for the detection of putative periodontal pathogens in subgingival samples to identify the microbiologic profile of periodontitis. Here we had chosen a genetic microbiologic test to identify microbiologic profile in amlodipine induced gingival enlargement with CVD. |
|
A paper point made of cellulose is introduced into the deep periodontal pocket. After 10 s, the point is withdrawn and placed in a RNA stabilizer buffer and sent for hybridization []. The IAI Pado Test 4.5 (IAI ESCHENWEG 6. CH-4528 ZUCHWIL/SWITZERLAND) which is a genetic test used in this case study allowed for the identification and quantification of bacteria which have a preponderant pathogenic role in periodontitis. |
|
The IAI Pado Test 4.5 is a biologic molecular test which allowed the identification and quantification. The specific periodontal pathogens like Aggregatibacter actinomycetemcomitans (Aa), Porphyromonas gingivalis (Pg), Tannerella forsythia (Tf), and Treponema denticola (Td) were identified by this test. |
|
Biopsy was taken during the surgical phase and sent to the laboratory. It revealed mixture of dense and loose fibrous components with the chronic inflammatory cell infiltrate in the connective tissue and elongation of rete pegs in the epithelium. On the basis of patient's history, clinical features, laboratory investigations for lipid profile, microbiologic profile, and biopsy reports, a diagnosis of amlodipine induced GO in a patient with CVD was made.""" |
|
], |
|
fn=summarize, |
|
inputs=input_text, |
|
outputs=output_text, |
|
cache_examples="lazy", |
|
) |
|
|
|
summarize_btn.click( |
|
fn=summarize, |
|
inputs=input_text, |
|
outputs=output_text, |
|
show_progress=True, |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |