Spaces:

Yermia
/

meeting-minutes-ai

Sleeping

App Files Files Community

Yermia commited on 24 days ago

Commit

5da9a16

1 Parent(s): b162d3c

First init

Browse files

Files changed (6) hide show

app.py +158 -0
models/config.py +37 -0
requirements.txt +22 -0
utils/output_generator.py +265 -0
utils/speech_processor.py +134 -0
utils/text_processor.py +226 -0

app.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import gradio as gr
+import torch
+from utils.speech_processor import SpeechProcessor
+from utils.text_processor import TextProcessor
+from utils.output_generator import OutputGenerator
+import tempfile
+import os
+# Initialize processors
+speech_processor = SpeechProcessor()
+text_processor = TextProcessor()
+output_generator = OutputGenerator()
+def process_meeting(audio_file, language="id", summary_ratio=0.3):
+    """
+    Main pipeline untuk memproses audio meeting
+    """
+    try:
+        # Step 1: Speech Processing
+        gr.Info("🎤 Memproses audio...")
+        transcript_with_speakers = speech_processor.process_audio(
+            audio_file,
+            language=language
+        )
+        # Step 2: Text Processing & Summarization
+        gr.Info("📝 Membuat ringkasan...")
+        summary = text_processor.summarize_transcript(
+            transcript_with_speakers,
+            ratio=summary_ratio
+        )
+        # Step 3: Information Extraction
+        gr.Info("🔍 Mengekstrak informasi penting...")
+        extracted_info = text_processor.extract_key_information(
+            transcript_with_speakers
+        )
+        # Step 4: Generate Output
+        gr.Info("📄 Membuat notulensi...")
+        outputs = output_generator.generate_all_formats(
+            transcript_with_speakers,
+            summary,
+            extracted_info
+        )
+        return (
+            outputs['markdown'],
+            outputs['json'],
+            outputs['transcript_table'],
+            outputs['action_items_table'],
+            outputs['decisions_table']
+        )
+    except Exception as e:
+        gr.Error(f"Error: {str(e)}")
+        return None, None, None, None, None
+# Gradio Interface
+with gr.Blocks(title="🤖 AI Meeting Minutes Generator") as demo:
+    gr.Markdown("""
+    # 🤖 AI Meeting Minutes Generator
+    Upload audio rapat Anda dan dapatkan notulensi otomatis dengan:
+    - 🎯 Identifikasi pembicara
+    - 📝 Ringkasan otomatis
+    - ✅ Action items
+    - 📊 Keputusan penting
+    """)
+    with gr.Row():
+        with gr.Column():
+            audio_input = gr.Audio(
+                label="Upload Audio Rapat",
+                type="filepath",
+                sources=["upload", "microphone"]
+            )
+            with gr.Row():
+                language = gr.Dropdown(
+                    choices=[
+                        ("Indonesia", "id"),
+                        ("English", "en")
+                    ],
+                    value="id",
+                    label="Bahasa"
+                )
+                summary_ratio = gr.Slider(
+                    minimum=0.1,
+                    maximum=0.5,
+                    value=0.3,
+                    step=0.05,
+                    label="Rasio Ringkasan"
+                )
+            process_btn = gr.Button("🚀 Proses Audio", variant="primary")
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 📄 Notulensi (Markdown)")
+            markdown_output = gr.Textbox(
+                label="Preview Notulensi",
+                lines=20,
+                max_lines=30
+            )
+            json_download = gr.File(
+                label="📥 Download JSON"
+            )
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 📊 Transkrip Lengkap")
+            transcript_table = gr.Dataframe(
+                headers=["Waktu", "Pembicara", "Teks"],
+                label="Transkrip dengan Pembicara"
+            )
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### ✅ Action Items")
+            action_items_table = gr.Dataframe(
+                headers=["Action Item", "Penanggung Jawab", "Timestamp"],
+                label="Daftar Action Items"
+            )
+        with gr.Column():
+            gr.Markdown("### 📌 Keputusan")
+            decisions_table = gr.Dataframe(
+                headers=["Keputusan", "Pembicara", "Timestamp"],
+                label="Daftar Keputusan"
+            )
+    # Process button action
+    process_btn.click(
+        fn=process_meeting,
+        inputs=[audio_input, language, summary_ratio],
+        outputs=[
+            markdown_output,
+            json_download,
+            transcript_table,
+            action_items_table,
+            decisions_table
+        ]
+    )
+    # Examples
+    gr.Examples(
+        examples=[
+            ["examples/meeting_sample_id.wav", "id", 0.3],
+            ["examples/meeting_sample_en.wav", "en", 0.25]
+        ],
+        inputs=[audio_input, language, summary_ratio]
+    )
+if __name__ == "__main__":
+    demo.launch()

models/config.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import os
+from dataclasses import dataclass
+@dataclass
+class ModelConfig:
+    # Whisper ASR
+    whisper_model: str = "openai/whisper-medium"
+    whisper_language: str = "id"
+    # Speaker Diarization
+    diarization_model: str = "pyannote/speaker-diarization-3.1"
+    min_speakers: int = 1
+    max_speakers: int = 10
+    # Text Processing
+    summarization_model: str = "bert-base-multilingual-cased"
+    ner_model: str = "cahya/bert-base-indonesian-NER"
+    keyword_model: str = "paraphrase-multilingual-MiniLM-L12-v2"
+    # Processing Parameters
+    chunk_size: int = 3000
+    chunk_overlap: int = 200
+    summary_ratio: float = 0.3
+    max_summary_sentences: int = 6
+    # Output
+    output_formats: list = None
+    def __post_init__(self):
+        if self.output_formats is None:
+            self.output_formats = ["markdown", "json", "html"]
+        # Set HF token from environment
+        self.hf_token = os.environ.get("HF_TOKEN", None)
+# Global config instance
+config = ModelConfig()

requirements.txt ADDED Viewed

	@@ -0,0 +1,22 @@

+# Core dependencies
+gradio==4.19.2
+transformers==4.37.2
+torch==2.1.2
+torchaudio==2.1.2
+# Audio processing
+pyannote.audio==3.1.1
+speechbrain==0.5.16
+librosa==0.10.1
+pydub==0.25.1
+# NLP
+keybert==0.8.3
+bert-extractive-summarizer==0.10.1
+nltk==3.8.1
+sentencepiece==0.1.99
+# Utils
+pandas==2.1.4
+markdown==3.5.2
+python-dotenv==1.0.0

utils/output_generator.py ADDED Viewed

	@@ -0,0 +1,265 @@

+import json
+import pandas as pd
+from datetime import datetime
+import tempfile
+class OutputGenerator:
+    def __init__(self):
+        self.templates = {
+            'markdown': self._load_markdown_template(),
+            'html': self._load_html_template()
+        }
+    def generate_all_formats(self, transcript, summary, extracted_info):
+        """
+        Generate output dalam berbagai format
+        """
+        # Prepare data
+        meeting_data = {
+            'date': datetime.now().strftime('%d %B %Y'),
+            'time': datetime.now().strftime('%H:%M'),
+            'duration': self._calculate_duration(transcript),
+            'participants': self._extract_participants(transcript),
+            'summary': summary,
+            'keywords': extracted_info['keywords'],
+            'action_items': extracted_info['action_items'],
+            'decisions': extracted_info['decisions'],
+            'transcript': transcript
+        }
+        # Generate outputs
+        outputs = {
+            'markdown': self._generate_markdown(meeting_data),
+            'json': self._generate_json(meeting_data),
+            'transcript_table': self._generate_transcript_table(transcript),
+            'action_items_table': self._generate_action_items_table(
+                extracted_info['action_items']
+            ),
+            'decisions_table': self._generate_decisions_table(
+                extracted_info['decisions']
+            )
+        }
+        return outputs
+    def _generate_markdown(self, data):
+        """
+        Generate markdown format meeting minutes
+        """
+        markdown = f"""# 📋 Notulensi Rapat - {data['date']}
+## 📊 Informasi Rapat
+- **Tanggal**: {data['date']}
+- **Waktu**: {data['time']}
+- **Durasi**: {data['duration']}
+- **Peserta**: {', '.join(data['participants'])}
+## 📝 Ringkasan Eksekutif
+{data['summary']}
+## 🎯 Topik Utama
+{self._format_keywords(data['keywords'])}
+## ✅ Action Items
+{self._format_action_items_md(data['action_items'])}
+## 📌 Keputusan Penting
+{self._format_decisions_md(data['decisions'])}
+## 💬 Transkrip Lengkap
+{self._format_transcript_md(data['transcript'])}
+---
+*Dokumen ini dihasilkan secara otomatis menggunakan AI Meeting Minutes Generator*
+"""
+        return markdown
+    def _generate_json(self, data):
+        """
+        Generate JSON output and save to file
+        """
+        json_data = {
+            'metadata': {
+                'generated_at': datetime.now().isoformat(),
+                'version': '1.0'
+            },
+            'meeting_info': {
+                'date': data['date'],
+                'duration': data['duration'],
+                'participants': data['participants']
+            },
+            'content': {
+                'summary': data['summary'],
+                'keywords': [kw[0] for kw in data['keywords'][:5]],
+                'action_items': [
+                    {
+                        'description': item['text'],
+                        'assigned_to': item['speaker'],
+                        'timestamp': item['timestamp'],
+                        'mentioned_persons': item['entities']['persons'],
+                        'mentioned_dates': item['entities']['dates']
+                    }
+                    for item in data['action_items']
+                ],
+                'decisions': [
+                    {
+                        'description': dec['text'],
+                        'made_by': dec['speaker'],
+                        'timestamp': dec['timestamp']
+                    }
+                    for dec in data['decisions']
+                ]
+            },
+            'full_transcript': [
+                {
+                    'speaker': seg['speaker'],
+                    'start_time': seg['start'],
+                    'end_time': seg['end'],
+                    'text': seg['text']
+                }
+                for seg in data['transcript']
+            ]
+        }
+        # Save to temporary file
+        temp_file = tempfile.NamedTemporaryFile(
+            mode='w',
+            suffix='.json',
+            delete=False
+        )
+        json.dump(json_data, temp_file, indent=2, ensure_ascii=False)
+        temp_file.close()
+        return temp_file.name
+    def _generate_transcript_table(self, transcript):
+        """
+        Generate transcript table for Gradio DataFrame
+        """
+        data = []
+        for seg in transcript:
+            data.append([
+                f"{seg['start']:.1f}s - {seg['end']:.1f}s",
+                seg['speaker'],
+                seg['text']
+            ])
+        return pd.DataFrame(data, columns=['Waktu', 'Pembicara', 'Teks'])
+    def _generate_action_items_table(self, action_items):
+        """
+        Generate action items table
+        """
+        data = []
+        for item in action_items:
+            # Extract mentioned persons for assignment
+            assignees = item['entities']['persons'] if item['entities']['persons'] else [item['speaker']]
+            dates = ', '.join(item['entities']['dates']) if item['entities']['dates'] else 'TBD'
+            data.append([
+                item['text'],
+                ', '.join(assignees),
+                item['timestamp']
+            ])
+        return pd.DataFrame(
+            data,
+            columns=['Action Item', 'Penanggung Jawab', 'Timestamp']
+        )
+    def _generate_decisions_table(self, decisions):
+        """
+        Generate decisions table
+        """
+        data = []
+        for dec in decisions:
+            data.append([
+                dec['text'],
+                dec['speaker'],
+                dec['timestamp']
+            ])
+        return pd.DataFrame(
+            data,
+            columns=['Keputusan', 'Pembicara', 'Timestamp']
+        )
+    # Helper methods
+    def _calculate_duration(self, transcript):
+        if not transcript:
+            return "0:00"
+        total_seconds = transcript[-1]['end']
+        hours = int(total_seconds // 3600)
+        minutes = int((total_seconds % 3600) // 60)
+        seconds = int(total_seconds % 60)
+        if hours > 0:
+            return f"{hours}:{minutes:02d}:{seconds:02d}"
+        else:
+            return f"{minutes}:{seconds:02d}"
+    def _extract_participants(self, transcript):
+        speakers = list(set([seg['speaker'] for seg in transcript]))
+        return sorted(speakers)
+    def _format_keywords(self, keywords):
+        return '\n'.join([f"- **{kw[0]}** (score: {kw[1]:.2f})"
+                         for kw in keywords[:5]])
+    def _format_action_items_md(self, action_items):
+        if not action_items:
+            return "*Tidak ada action items yang terdeteksi*"
+        formatted = []
+        for i, item in enumerate(action_items, 1):
+            assignees = item['entities']['persons'] if item['entities']['persons'] else [item['speaker']]
+            formatted.append(f"{i}. {item['text']}\n   - **Penanggung Jawab**: {', '.join(assignees)}\n   - **Waktu**: {item['timestamp']}")
+        return '\n\n'.join(formatted)
+    def _format_decisions_md(self, decisions):
+        if not decisions:
+            return "*Tidak ada keputusan yang terdeteksi*"
+        formatted = []
+        for i, dec in enumerate(decisions, 1):
+            formatted.append(f"{i}. {dec['text']}\n   - **Diputuskan oleh**: {dec['speaker']}\n   - **Waktu**: {dec['timestamp']}")
+        return '\n\n'.join(formatted)
+    def _format_transcript_md(self, transcript):
+        formatted = []
+        current_speaker = None
+        for seg in transcript:
+            if seg['speaker'] != current_speaker:
+                formatted.append(f"\n**{seg['speaker']}** ({seg['start']:.1f}s):")
+                current_speaker = seg['speaker']
+            formatted.append(f"> {seg['text']}")
+        return '\n'.join(formatted)
+    def _load_markdown_template(self):
+        # Template bisa di-customize
+        return """# Meeting Minutes Template
+{content}
+"""
+    def _load_html_template(self):
+        return """<!DOCTYPE html>
+<html>
+<head>
+    <style>
+        body { font-family: Arial, sans-serif; margin: 40px; }
+        h1 { color: #333; }
+        .metadata { background: #f0f0f0; padding: 15px; border-radius: 5px; }
+        .action-item { background: #e8f5e9; padding: 10px; margin: 10px 0; border-left: 4px solid #4caf50; }
+        .decision { background: #e3f2fd; padding: 10px; margin: 10px 0; border-left: 4px solid #2196f3; }
+    </style>
+</head>
+<body>
+{content}
+</body>
+</html>"""

utils/speech_processor.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import torch
+import torchaudio
+from transformers import (
+    WhisperProcessor,
+    WhisperForConditionalGeneration,
+    pipeline
+)
+from pyannote.audio import Pipeline
+import librosa
+import numpy as np
+from pydub import AudioSegment
+import tempfile
+class SpeechProcessor:
+    def __init__(self):
+        # Load Whisper for ASR
+        self.whisper_processor = WhisperProcessor.from_pretrained(
+            "openai/whisper-medium"
+        )
+        self.whisper_model = WhisperForConditionalGeneration.from_pretrained(
+            "openai/whisper-medium"
+        )
+        # Load speaker diarization
+        self.diarization_pipeline = Pipeline.from_pretrained(
+            "pyannote/speaker-diarization-3.1",
+            use_auth_token=os.environ.get("HF_TOKEN")
+        )
+    def process_audio(self, audio_path, language="id"):
+        """
+        Process audio file untuk ASR dan speaker diarization
+        """
+        # Convert to WAV if needed
+        audio_path = self._ensure_wav_format(audio_path)
+        # Load audio
+        waveform, sample_rate = torchaudio.load(audio_path)
+        # Speaker diarization
+        diarization = self.diarization_pipeline(audio_path)
+        # Process each speaker segment
+        transcript_segments = []
+        for turn, _, speaker in diarization.itertracks(yield_label=True):
+            # Extract segment audio
+            start_sample = int(turn.start * sample_rate)
+            end_sample = int(turn.end * sample_rate)
+            segment_waveform = waveform[:, start_sample:end_sample]
+            # ASR on segment
+            text = self._transcribe_segment(
+                segment_waveform,
+                sample_rate,
+                language
+            )
+            transcript_segments.append({
+                "start": round(turn.start, 2),
+                "end": round(turn.end, 2),
+                "speaker": speaker,
+                "text": text
+            })
+        return self._merge_consecutive_segments(transcript_segments)
+    def _transcribe_segment(self, waveform, sample_rate, language):
+        """
+        Transcribe audio segment menggunakan Whisper
+        """
+        # Resample if needed
+        if sample_rate != 16000:
+            resampler = torchaudio.transforms.Resample(sample_rate, 16000)
+            waveform = resampler(waveform)
+        # Prepare input
+        input_features = self.whisper_processor(
+            waveform.squeeze().numpy(),
+            sampling_rate=16000,
+            return_tensors="pt"
+        ).input_features
+        # Generate transcription
+        forced_decoder_ids = self.whisper_processor.get_decoder_prompt_ids(
+            language=language,
+            task="transcribe"
+        )
+        predicted_ids = self.whisper_model.generate(
+            input_features,
+            forced_decoder_ids=forced_decoder_ids,
+            max_length=448
+        )
+        transcription = self.whisper_processor.batch_decode(
+            predicted_ids,
+            skip_special_tokens=True
+        )[0]
+        return transcription.strip()
+    def _ensure_wav_format(self, audio_path):
+        """
+        Convert audio to WAV format if needed
+        """
+        if not audio_path.endswith('.wav'):
+            audio = AudioSegment.from_file(audio_path)
+            wav_path = tempfile.mktemp(suffix='.wav')
+            audio.export(wav_path, format='wav')
+            return wav_path
+        return audio_path
+    def _merge_consecutive_segments(self, segments):
+        """
+        Merge consecutive segments from same speaker
+        """
+        if not segments:
+            return segments
+        merged = [segments[0]]
+        for current in segments[1:]:
+            last = merged[-1]
+            # Merge if same speaker and close in time
+            if (last['speaker'] == current['speaker'] and
+                current['start'] - last['end'] < 1.0):
+                last['end'] = current['end']
+                last['text'] += ' ' + current['text']
+            else:
+                merged.append(current)
+        return merged

utils/text_processor.py ADDED Viewed

	@@ -0,0 +1,226 @@

+from transformers import (
+    AutoTokenizer,
+    AutoModelForSeq2SeqLM,
+    AutoModelForTokenClassification,
+    pipeline
+)
+from keybert import KeyBERT
+from summarizer import Summarizer
+import re
+import nltk
+nltk.download('punkt')
+class TextProcessor:
+    def __init__(self):
+        # Initialize summarization model
+        self.summarizer = Summarizer('bert-base-multilingual-cased')
+        # Initialize KeyBERT for keyword extraction
+        self.kw_model = KeyBERT('paraphrase-multilingual-MiniLM-L12-v2')
+        # Initialize NER for action item detection
+        self.ner_pipeline = pipeline(
+            "ner",
+            model="cahya/bert-base-indonesian-NER",
+            aggregation_strategy="simple"
+        )
+                # Action item patterns
+        self.action_patterns = [
+            r"akan\s+(\w+)",
+            r"harus\s+(\w+)",
+            r"perlu\s+(\w+)",
+            r"mohon\s+(\w+)",
+            r"tolong\s+(\w+)",
+            r"segera\s+(\w+)",
+            r"follow\s*up",
+            r"action\s*item",
+            r"to\s*do",
+            r"deadline"
+        ]
+        # Decision patterns
+        self.decision_patterns = [
+            r"(diputuskan|memutuskan)\s+(.+)",
+            r"(disepakati|menyepakati)\s+(.+)",
+            r"(setuju|persetujuan)\s+(.+)",
+            r"keputusan(?:nya)?\s+(.+)",
+            r"final(?:isasi)?\s+(.+)"
+        ]
+    def summarize_transcript(self, transcript_segments, ratio=0.3):
+        """
+        Hierarchical summarization untuk transcript panjang
+        """
+        # Gabungkan text dari semua segments
+        full_text = ' '.join([seg['text'] for seg in transcript_segments])
+        # Chunking untuk dokumen panjang
+        chunks = self._create_chunks(full_text)
+        if len(chunks) == 1:
+            # Direct summarization untuk dokumen pendek
+            return self.summarizer(
+                chunks[0],
+                ratio=ratio,
+                num_sentences=5
+            )
+        else:
+            # Hierarchical summarization
+            return self._hierarchical_summarization(chunks, ratio)
+    def extract_key_information(self, transcript_segments):
+        """
+        Extract action items, decisions, dan key topics
+        """
+        full_text = ' '.join([seg['text'] for seg in transcript_segments])
+        # Extract keywords/topics
+        keywords = self.kw_model.extract_keywords(
+            full_text,
+            keyphrase_ngram_range=(1, 3),
+            stop_words='indonesian',
+            top_n=10,
+            use_mmr=True,
+            diversity=0.5
+        )
+        # Extract action items dan decisions
+        action_items = []
+        decisions = []
+        for segment in transcript_segments:
+            # Check for action items
+            if self._is_action_item(segment['text']):
+                action_items.append({
+                    'text': segment['text'],
+                    'speaker': segment['speaker'],
+                    'timestamp': f"{segment['start']:.1f}s",
+                    'entities': self._extract_entities(segment['text'])
+                })
+            # Check for decisions
+            if self._is_decision(segment['text']):
+                decisions.append({
+                    'text': segment['text'],
+                    'speaker': segment['speaker'],
+                    'timestamp': f"{segment['start']:.1f}s"
+                })
+        return {
+            'keywords': keywords,
+            'action_items': action_items,
+            'decisions': decisions
+        }
+    def _create_chunks(self, text, max_length=3000):
+        """
+        Create overlapping chunks for long documents
+        """
+        sentences = nltk.sent_tokenize(text)
+        chunks = []
+        current_chunk = []
+        current_length = 0
+        for sentence in sentences:
+            sentence_length = len(sentence)
+            if current_length + sentence_length > max_length and current_chunk:
+                chunks.append(' '.join(current_chunk))
+                # Keep last 2 sentences for overlap
+                current_chunk = current_chunk[-2:] if len(current_chunk) > 2 else []
+                current_length = sum(len(s) for s in current_chunk)
+            current_chunk.append(sentence)
+            current_length += sentence_length
+        if current_chunk:
+            chunks.append(' '.join(current_chunk))
+        return chunks
+    def _hierarchical_summarization(self, chunks, ratio):
+        """
+        Two-level summarization for long documents
+        """
+        # Level 1: Summarize each chunk
+        chunk_summaries = []
+        for chunk in chunks:
+            summary = self.summarizer(
+                chunk,
+                ratio=0.4,  # Higher ratio for first level
+                num_sentences=4
+            )
+            chunk_summaries.append(summary)
+        # Level 2: Summarize the summaries
+        combined_summary = ' '.join(chunk_summaries)
+        final_summary = self.summarizer(
+            combined_summary,
+            ratio=ratio,
+            num_sentences=6
+        )
+        return final_summary
+    def _is_action_item(self, text):
+        """
+        Detect if text contains action item
+        """
+        text_lower = text.lower()
+        # Check patterns
+        for pattern in self.action_patterns:
+            if re.search(pattern, text_lower):
+                return True
+        # Check for imperative sentences
+        first_word = text.split()[0].lower() if text.split() else ""
+        imperative_verbs = [
+            'lakukan', 'buat', 'siapkan', 'kirim', 'hubungi',
+            'follow', 'prepare', 'send', 'contact', 'create'
+        ]
+        return first_word in imperative_verbs
+    def _is_decision(self, text):
+        """
+        Detect if text contains decision
+        """
+        text_lower = text.lower()
+        for pattern in self.decision_patterns:
+            if re.search(pattern, text_lower):
+                return True
+        return False
+    def _extract_entities(self, text):
+        """
+        Extract named entities (person, date, etc)
+        """
+        entities = self.ner_pipeline(text)
+        return {
+            'persons': [e['word'] for e in entities if e['entity_group'] == 'PER'],
+            'organizations': [e['word'] for e in entities if e['entity_group'] == 'ORG'],
+            'dates': self._extract_dates(text)
+        }
+    def _extract_dates(self, text):
+        """
+        Extract date mentions
+        """
+        date_patterns = [
+            r'\d{1,2}[-/]\d{1,2}[-/]\d{2,4}',
+            r'(senin|selasa|rabu|kamis|jumat|sabtu|minggu)',
+            r'(besok|lusa|minggu\s+depan|bulan\s+depan)',
+            r'(januari|februari|maret|april|mei|juni|juli|agustus|september|oktober|november|desember)'
+        ]
+        dates = []
+        for pattern in date_patterns:
+            matches = re.findall(pattern, text.lower())
+            dates.extend(matches)
+        return dates