Spaces:

gregorlied
/

medical-text-summarization

Sleeping

App Files Files Community

gregorlied commited on Jul 15

Commit

78f48b0

verified ·

1 Parent(s): 58306e1

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -34

app.py CHANGED Viewed

@@ -3,10 +3,10 @@ import spaces
 import gradio as gr
 import torch
-from transformers import AutoTokenizer
 from huggingface_hub import login as hf_login
-from vllm import LLM
 from pydantic import BaseModel
 os.environ["VLLM_LOGGING_LEVEL"]="DEBUG"
@@ -16,35 +16,70 @@ hf_login(token=os.getenv("HF_TOKEN"))
 model_name = "meta-llama/Llama-3.2-1B-Instruct"
-model = LLM(
-    model=model_name,
-    dtype=torch.bfloat16,
-    trust_remote_code=True,
-    enforce_eager=True,
 )
-class Info(BaseModel):
-    name: str
-    age: int
-json_schema = Info.model_json_schema()
-guided_decoding_params = GuidedDecodingParams(json=json_schema)
-sampling_params = SamplingParams(
-    temperature=0.1,
-    max_tokens=2048,
-    guided_decoding=guided_decoding_params,
-)
-prompt = "You are a helpful assistant."
-tokenizer = AutoTokenizer.from_pretrained(
-    model_name,
-    padding_side='right',
-    trust_remote_code=True,
 )
-if tokenizer.pad_token is None:
-    tokenizer.add_special_tokens({'pad_token': '<pad>'})
 @spaces.GPU(duration=60)
 def summarize(text):
@@ -56,16 +91,25 @@ def summarize(text):
         {"role": "user", "content": text},
     ]
-    input_text = tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True,
-        enable_thinking=False,
     )
-    outputs = model.generate([input_text], sampling_params)
-    prediction = outputs[0].outputs[0].text
-    return prediction
 with gr.Blocks() as demo:
     gr.Markdown("## 📝 Summarization for News, SciTLDR and Dialog Texts")

 import gradio as gr
 import torch
+from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
 from huggingface_hub import login as hf_login
+import xgrammar as xgr
 from pydantic import BaseModel
 os.environ["VLLM_LOGGING_LEVEL"]="DEBUG"
 model_name = "meta-llama/Llama-3.2-1B-Instruct"
+model = AutoModelForCausalLM.from_pretrained(
+    model_name, torch_dtype=torch.float32, device_map="auto"
 )
+class Person(BaseModel):
+  life_style: str
+  family_history: str
+  social_history: str
+  medical_surgical_history: str
+  signs_symptoms: str
+  comorbidities: str
+  diagnostic_techniques_procedures: str
+  diagnosis: str
+  laboratory_values: str
+  pathology: str
+  pharmacological_therapy: str
+  interventional_therapy: str
+  patient_outcome_assessment: str
+  age: str
+  gender: str
+config = AutoConfig.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+tokenizer_info = xgr.TokenizerInfo.from_huggingface(
+    tokenizer, vocab_size=config.vocab_size
 )
+grammar_compiler = xgr.GrammarCompiler(tokenizer_info)
+compiled_grammar = grammar_compiler.compile_json_schema(Person)
+xgr_logits_processor = xgr.contrib.hf.LogitsProcessor(compiled_grammar)
+prompt = """You are a text extraction system for clinical reports.
+Please extract relevant clinical information from the report.
+### Instructions
+- Use the JSON Schema given below.
+- Return only a valid JSON object – no markdown, no comments.
+- If no relevant facts are given for a field, set its value to "N/A".
+- If multile relevant facts are given for a field, separate them with "; ".
+### JSON Schema
+{
+  'life_style': '',
+  'family_history': '',
+  'social_history': '',
+  'medical_surgical_history': '',
+  'signs_symptoms': '',
+  'comorbidities': '',
+  'diagnostic_techniques_procedures': '',
+  'diagnosis': '',
+  'laboratory_values': '',
+  'pathology': '',
+  'pharmacological_therapy': '',
+  'interventional_therapy': '',
+  'patient_outcome_assessment': '',
+  'age': '',
+  'gender': '',
+}
+### Clinical Report
+"""
 @spaces.GPU(duration=60)
 def summarize(text):
         {"role": "user", "content": text},
     ]
+    model_inputs = tokenizer([text], return_tensors="pt").to('cuda')
+    generated_ids = model.generate(
+        input_ids=model_inputs["input_ids"],
+        attention_mask = model_inputs["attention_mask"],
+        # num_beams=8,
+        # top_p=0.9,
+        # do_sample=True,
+        # temperature=0.6,
+        max_new_tokens=2048,
+        logits_processor=[xgr_logits_processor]
     )
+    generated_ids = [
+        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+    ]
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+    return response[0]
 with gr.Blocks() as demo:
     gr.Markdown("## 📝 Summarization for News, SciTLDR and Dialog Texts")