Spaces:

yunusajib
/

GP_Consultation_Summerizer

Build error

App Files Files Community

yunusajib commited on Jun 12

Commit

5cc39c0

verified ·

1 Parent(s): db349e8

update app

Browse files

Files changed (1) hide show

app.py +176 -104

app.py CHANGED Viewed

@@ -1,112 +1,184 @@
 import gradio as gr
-import torch
 import os
-from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
-from pyannote.audio import Pipeline as DiarizationPipeline
-import whisper
 import tempfile
-import shutil
 from pydub import AudioSegment
-# Load whisper model
-whisper_model = whisper.load_model("base")  # Use "small" or "medium" if needed
-# Load summarization pipeline
-summarizer_tokenizer = AutoTokenizer.from_pretrained("t5-small")
-summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")
-summarizer = pipeline("summarization", model=summarizer_model, tokenizer=summarizer_tokenizer)
-def convert_to_wav(input_path, output_path):
-    audio = AudioSegment.from_file(input_path)
-    audio.export(output_path, format="wav")
-def transcribe_audio(audio_path):
-    result = whisper_model.transcribe(audio_path, fp16=torch.cuda.is_available())
-    return result['text']
-def diarize_audio(audio_path, hf_token):
-    os.environ["HF_TOKEN"] = hf_token
-    diarization_pipeline = DiarizationPipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=hf_token)
-    diarization = diarization_pipeline(audio_path)
-    return diarization
-def combine_diarized_transcript(diarization, full_text):
-    # Basic speaker labeling using diarization and full text
-    # Note: This is a simplified alignment using time chunks only
-    chunks = []
-    for turn, _, speaker in diarization.itertracks(yield_label=True):
-        start, end = turn.start, turn.end
-        chunks.append(f"{speaker}: [from {start:.1f}s to {end:.1f}s]")
-    # Combine for display/demo
-    return "\n".join(chunks) + "\n" + full_text
-def summarize_text(text):
-    prefix = "summarize: " + text.strip()
-    inputs = summarizer_tokenizer.encode(prefix, return_tensors="pt", max_length=512, truncation=True)
-    summary_ids = summarizer_model.generate(inputs, max_length=100, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
-    return summarizer_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
-def process_pipeline(audio_file, hf_token):
-    if not hf_token:
-        return "", "", "Error: HuggingFace token is required."
-    if not os.path.exists(audio_file) or os.path.getsize(audio_file) == 0:
-        return "", "", "Error: Uploaded file is missing or empty."
-    # Step 1: Convert to WAV if needed
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav:
-        try:
-            sound = AudioSegment.from_file(audio_file)
-            sound.export(tmp_wav.name, format="wav")
-            tmp_path = tmp_wav.name
-        except Exception as e:
-            return "", "", f"Audio conversion failed: {str(e)}"
-    # Step 2: Transcription (Whisper)
-    try:
-        transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base",return_timestamps=True, device=0 if torch.cuda.is_available() else -1)
-        result = transcriber(tmp_path)
-        transcript = result["text"]
-    except Exception as e:
-        return "", "", f"Transcription failed: {str(e)}"
-    # Step 3: Summarization
     try:
-        summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0 if torch.cuda.is_available() else -1)
-        summary = summarizer(transcript, max_length=130, min_length=30, do_sample=False)[0]["summary_text"]
     except Exception as e:
-        return transcript, "", f"Summarization failed: {str(e)}"
-    return tmp_path, transcript, summary
-description = """
-### 🩺 GP Consultation Summarizer (Demo App)
-This app:
-1. Transcribes short consultation audio using Whisper
-2. Identifies who spoke when using PyAnnote speaker diarization
-3. Combines both into a labeled transcript
-4. Generates a short summary using T5-small
-⚠️ **Note:** Best for short consultations (under 5–6 minutes).
-⚠️ You must provide your own Hugging Face token (required for diarization).
-"""
-app = gr.Interface(
-    fn=process_pipeline,
-    inputs=[
-        gr.Audio(type="filepath", label="Upload Consultation Audio (.wav)"),
-        gr.Textbox(label="Your Hugging Face Token", type="password")
-    ],
-    outputs=[
-        gr.Textbox(label="Raw Transcript"),
-        gr.Textbox(label="Labeled Transcript (with Speaker Info)"),
-        gr.Textbox(label="Summary")
-    ],
-    title="GP Consultation Summarizer",
-    description=description,
-    allow_flagging="never"
-)
 if __name__ == "__main__":
-    app.launch(share=True)

 import gradio as gr
 import os
 import tempfile
+import torch
 from pydub import AudioSegment
+import whisper
+from pyannote.audio import Pipeline
+from pyannote.core import Segment
+from lmdeploy import pipeline as lm_pipeline
+from lmdeploy import GenerationConfig, TurbomindEngineConfig
+from transformers import pipeline as hf_pipeline
+from presidio_analyzer import AnalyzerEngine
+from presidio_anonymizer import AnonymizerEngine
+# --- Configuration ---
+MEDICAL_NER_MODEL = "d4data/biomedical-ner-all"
+WHISPER_MODEL_SIZE = "base"  # "small" or "medium" for better accuracy
+DEFAULT_HF_TOKEN = "your_huggingface_token_here"  # Replace with your token
+# --- Global Models ---
+whisper_model = None
+diarization_pipeline = None
+med_ner = None
+phi_analyzer = AnalyzerEngine()
+phi_anonymizer = AnonymizerEngine()
+qwen_models = {
+    "Qwen Medical 7B": "Qwen/Qwen2.5-7B-Instruct-1M",
+    "Qwen Fast 3B": "Qwen/Qwen2.5-3B-Instruct",
+}
+# --- Helper Functions ---
+def load_models(hf_token):
+    """Load all required models"""
+    global whisper_model, diarization_pipeline, med_ner
+    # Load Whisper (Speech-to-Text)
+    if whisper_model is None:
+        whisper_model = whisper.load_model(WHISPER_MODEL_SIZE, device="cuda" if torch.cuda.is_available() else "cpu")
+    # Load Diarization
+    if diarization_pipeline is None:
+        diarization_pipeline = Pipeline.from_pretrained(
+            "pyannote/speaker-diarization",
+            use_auth_token=hf_token
+        )
+    # Load Medical NER
+    if med_ner is None:
+        med_ner = hf_pipeline("ner", model=MEDICAL_NER_MODEL, aggregation_strategy="simple")
+    return "Models loaded successfully"
+def convert_audio_to_wav(input_file):
+    """Convert any audio file to 16kHz WAV format"""
+    audio = AudioSegment.from_file(input_file)
+    wav_path = os.path.join(tempfile.gettempdir(), "consultation.wav")
+    audio.set_frame_rate(16000).export(wav_path, format="wav")
+    return wav_path
+def anonymize_phi(text):
+    """Remove personally identifiable health information"""
+    results = phi_analyzer.analyze(text=text, language="en")
+    anonymized = phi_anonymizer.anonymize(text, results)
+    return anonymized.text
+# --- Core Processing Functions ---
+def transcribe_and_diarize(audio_file, hf_token):
+    """Convert audio to text with speaker labels"""
     try:
+        # Convert audio
+        wav_path = convert_audio_to_wav(audio_file)
+        # Transcribe
+        transcript = whisper_model.transcribe(wav_path)["segments"]
+        # Diarize
+        diarization = diarization_pipeline(wav_path)
+        # Combine results
+        output = []
+        for seg in transcript:
+            start, end, text = seg["start"], seg["end"], seg["text"]
+            speaker = next(diarization.itertracks(yield_label=True)).label
+            output.append(f"[{start:.1f}s] {speaker}: {text}")
+        return "\n".join(output), transcript
     except Exception as e:
+        return f"Error: {str(e)}", None
+def extract_medical_entities(text):
+    """Identify drugs, conditions, and procedures"""
+    entities = med_ner(text)
+    return {
+        "Drugs": [e["word"] for e in entities if e["entity_group"] == "DRUG"],
+        "Conditions": [e["word"] for e in entities if e["entity_group"] == "DISEASE"],
+        "Procedures": [e["word"] for e in entities if e["entity_group"] == "TREATMENT"]
+    }
+def generate_soap_notes(transcript, model_choice, anonymize_phi_flag):
+    """Generate structured medical notes using Qwen"""
+    # Anonymize if requested
+    if anonymize_phi_flag:
+        transcript = anonymize_phi(transcript)
+    # Initialize Qwen
+    engine_config = TurbomindEngineConfig(
+        cache_max_entry_count=0.5,
+        session_len=131072
+    )
+    pipe = lm_pipeline(qwen_models[model_choice], backend_config=engine_config)
+    # Medical prompt template
+    system_prompt = """You are a clinical assistant. Convert this doctor-patient conversation into SOAP notes:
+    - Subjective: Patient-reported symptoms
+    - Objective: Clinician observations
+    - Assessment: Diagnosis/differential
+    - Plan: Treatment and follow-up"""
+    response = pipe([{
+        "role": "system",
+        "content": system_prompt
+    }, {
+        "role": "user",
+        "content": f"Consultation Transcript:\n{transcript}\n\nGenerate concise SOAP notes:"
+    }], GenerationConfig(max_new_tokens=1024))
+    return response.text
+# --- Gradio Interface ---
+with gr.Blocks(title="Clinical Consultation Summarizer", theme=gr.themes.Soft()) as app:
+    gr.Markdown("""# 🩺 Patient-Doctor Consultation Summarizer""")
+    with gr.Row():
+        with gr.Column():
+            audio_input = gr.Audio(
+                sources=["upload", "microphone"],
+                type="filepath",
+                label="Upload Consultation Recording"
+            )
+            hf_token = gr.Textbox(
+                label="Hugging Face Token",
+                value=DEFAULT_HF_TOKEN,
+                type="password"
+            )
+            model_choice = gr.Dropdown(
+                choices=list(qwen_models.keys()),
+                value="Qwen Medical 7B",
+                label="Model"
+            )
+            anonymize_check = gr.Checkbox(
+                label="Anonymize Protected Health Info (PHI)",
+                value=True
+            )
+            process_btn = gr.Button("Process Consultation")
+        with gr.Column():
+            with gr.Tabs():
+                with gr.Tab("Transcript"):
+                    transcript_output = gr.Textbox(label="Transcribed Conversation", lines=15)
+                with gr.Tab("SOAP Notes"):
+                    soap_output = gr.Textbox(label="Clinical Summary", lines=15)
+                with gr.Tab("Medical Entities"):
+                    entity_output = gr.JSON(label="Extracted Medical Terms")
+    # Processing
+    process_btn.click(
+        fn=lambda audio, token: load_models(token) or transcribe_and_diarize(audio, token),
+        inputs=[audio_input, hf_token],
+        outputs=[transcript_output, gr.State()]
+    ).success(
+        fn=generate_soap_notes,
+        inputs=[transcript_output, model_choice, anonymize_check],
+        outputs=soap_output
+    ).success(
+        fn=extract_medical_entities,
+        inputs=transcript_output,
+        outputs=entity_output
+    )
 if __name__ == "__main__":
+    app.launch(server_port=7860, share=True)