Spaces:

gaur3009
/

MemoirAI

Sleeping

App Files Files Community

gaur3009 commited on Jul 7

Commit

7ab5858

verified ·

1 Parent(s): 4d65b38

Update analyzer.py

Browse files

Files changed (1) hide show

analyzer.py +31 -91

analyzer.py CHANGED Viewed

@@ -1,137 +1,77 @@
-# analyzer.py (Key Point Extraction)
-from transformers import pipeline
 import re
 from datetime import datetime, timedelta
-import config
-# Load NLP model
-summarizer = pipeline("summarization", model="philschmid/bart-large-cnn-samsum")
 class MeetingAnalyzer:
     def __init__(self):
         self.transcript_chunks = []
-        self.speakers = {}
-        self.current_speaker = "Unknown"
         self.action_items = []
         self.decisions = []
     def process_chunk(self, text_chunk):
         self.transcript_chunks.append(text_chunk)
-        # Simple speaker detection
-        if ":" in text_chunk:
-            parts = text_chunk.split(":", 1)
-            if len(parts) > 1:
-                speaker, content = parts
-                self.current_speaker = speaker.strip()
-                if self.current_speaker not in self.speakers:
-                    self.speakers[self.current_speaker] = []
-                self.speakers[self.current_speaker].append(content.strip())
     def generate_summary(self):
         full_text = " ".join(self.transcript_chunks)
         # If text is too short, skip summarization
         if len(full_text.split()) < 50:
-            return "Not enough content for summary"
-        # Generate summary in chunks for long meetings
-        max_chunk_size = 1000
-        chunks = [full_text[i:i+max_chunk_size] for i in range(0, len(full_text), max_chunk_size)]
-        summaries = []
-        for chunk in chunks:
-            summary = summarizer(
-                chunk,
-                max_length=config.SUMMARY_MAX_LENGTH,
-                min_length=config.SUMMARY_MIN_LENGTH,
-                do_sample=False
-            )[0]['summary_text']
-            summaries.append(summary)
-        return " ".join(summaries)
     def extract_action_items(self):
-        full_text = " ".join(self.transcript_chunks)
         action_items = []
-        # Pattern matching for action items
         patterns = [
-            r"(\bwill\b.*?\bby\b\s+\w+\s+\d{1,2})",
-            r"(\baction\b:\s*(.*?)(?:\bdeadline\b|\bby\b|\bfor\b)\s*(\w+\s+\d{1,2}))",
-            r"(\btodo\b:\s*(.*?)(?:\bdue\b|\bby\b)\s*(\w+\s+\d{1,2}))",
-            r"(\bassign(?:ed)? to\b\s+(\w+):\s*(.*?)(?:\bdeadline\b|\bby\b)\s*(\w+\s+\d{1,2}))"
         ]
         for pattern in patterns:
-            for match in re.finditer(pattern, full_text, re.IGNORECASE):
-                groups = match.groups()
-                if groups:
-                    # Different patterns have different group structures
-                    if len(groups) == 1:
-                        task = groups[0]
-                        owner = "Unassigned"
-                        deadline = "ASAP"
-                    elif len(groups) == 3:
-                        task = groups[1]
-                        owner = groups[0]
-                        deadline = groups[2]
-                    else:
-                        task = groups[0]
-                        owner = "Unassigned"
-                        deadline = "ASAP"
-                    action_items.append({
-                        "task": task.strip(),
-                        "owner": owner.strip(),
-                        "deadline": self.normalize_deadline(deadline.strip())
-                    })
         return action_items
     def detect_urgent_action_items(self):
         urgent_items = []
         for item in self.action_items:
-            if "urgent" in item['task'].lower() or "asap" in item['deadline'].lower():
                 urgent_items.append(item)
         return urgent_items
     def extract_decisions(self):
-        full_text = " ".join(self.transcript_chunks)
         decisions = []
-        # Pattern matching for decisions
         patterns = [
             r"\bdecided to\b (.*?)[\.\n]",
             r"\bagreed that\b (.*?)[\.\n]",
-            r"\bconsensus is\b (.*?)[\.\n]",
-            r"\bresolution\b: (.*?)[\.\n]"
         ]
         for pattern in patterns:
-            for match in re.finditer(pattern, full_text, re.IGNORECASE):
-                decision = match.group(1).strip()
-                decisions.append(decision)
-        return decisions
-    def normalize_deadline(self, deadline_str):
-        today = datetime.now()
-        lower_str = deadline_str.lower()
-        if "today" in lower_str:
-            return today.strftime("%Y-%m-%d")
-        elif "tomorrow" in lower_str:
-            return (today + timedelta(days=1)).strftime("%Y-%m-%d")
-        elif "next week" in lower_str:
-            return (today + timedelta(weeks=1)).strftime("%Y-%m-%d")
-        elif "eod" in lower_str:
-            return today.strftime("%Y-%m-%d")
-        elif "eow" in lower_str:
-            # Find next Friday
-            days_ahead = 4 - today.weekday()  # 0 = Monday, 4 = Friday
-            if days_ahead <= 0:  # If today is Friday or weekend
-                days_ahead += 7
-            return (today + timedelta(days=days_ahead)).strftime("%Y-%m-%d")
-        return deadline_str

+# analyzer.py (Meeting Analysis)
 import re
 from datetime import datetime, timedelta
 class MeetingAnalyzer:
     def __init__(self):
         self.transcript_chunks = []
         self.action_items = []
         self.decisions = []
     def process_chunk(self, text_chunk):
         self.transcript_chunks.append(text_chunk)
+        # Simple action item detection
+        if "action item" in text_chunk.lower() or "todo" in text_chunk.lower():
+            self.action_items.append({
+                "task": text_chunk,
+                "owner": "Unassigned",
+                "deadline": "ASAP"
+            })
     def generate_summary(self):
         full_text = " ".join(self.transcript_chunks)
         # If text is too short, skip summarization
         if len(full_text.split()) < 50:
+            return "Meeting in progress... summary will appear here"
+        # Simple summary - just the first few sentences
+        sentences = full_text.split('. ')
+        return ". ".join(sentences[:3]) + "."
     def extract_action_items(self):
+        # Extract action items from transcript
         action_items = []
+        # Look for assignment patterns
         patterns = [
+            r"(\bassign\b.*? to \b(.*?)\b)",
+            r"(\baction item\b: (.*?))",
+            r"(\btodo\b: (.*?))",
+            r"(\bplease\b (.*?))"
         ]
         for pattern in patterns:
+            for match in re.finditer(pattern, " ".join(self.transcript_chunks), re.IGNORECASE):
+                task = match.group(1).strip()
+                action_items.append({
+                    "task": task,
+                    "owner": "Unassigned",
+                    "deadline": "ASAP"
+                })
         return action_items
     def detect_urgent_action_items(self):
         urgent_items = []
         for item in self.action_items:
+            if "urgent" in item['task'].lower() or "asap" in item['task'].lower():
                 urgent_items.append(item)
         return urgent_items
     def extract_decisions(self):
         decisions = []
+        # Look for decision patterns
         patterns = [
             r"\bdecided to\b (.*?)[\.\n]",
             r"\bagreed that\b (.*?)[\.\n]",
+            r"\bconsensus is\b (.*?)[\.\n]"
         ]
         for pattern in patterns:
+            for match in re.finditer(pattern, " ".join(self.transcript_chunks), re.IGNORECASE):
+                decisions.append(match.group(1).strip())
+        return decisions