gaur3009 commited on
Commit
b45ba66
·
verified ·
1 Parent(s): 4b0e845

Create analyzer.py

Browse files
Files changed (1) hide show
  1. analyzer.py +135 -0
analyzer.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # analyzer.py (Key Point Extraction)
2
+ from transformers import pipeline
3
+ import re
4
+ from datetime import datetime, timedelta
5
+ import config
6
+
7
+ # Load NLP model
8
+ summarizer = pipeline("summarization", model="philschmid/bart-large-cnn-samsum")
9
+
10
+ class MeetingAnalyzer:
11
+ def __init__(self):
12
+ self.transcript_chunks = []
13
+ self.speakers = {}
14
+ self.current_speaker = "Unknown"
15
+ self.action_items = []
16
+ self.decisions = []
17
+
18
+ def process_chunk(self, text_chunk):
19
+ self.transcript_chunks.append(text_chunk)
20
+
21
+ # Simple speaker detection
22
+ if ":" in text_chunk:
23
+ speaker, content = text_chunk.split(":", 1)
24
+ self.current_speaker = speaker.strip()
25
+ if self.current_speaker not in self.speakers:
26
+ self.speakers[self.current_speaker] = []
27
+ self.speakers[self.current_speaker].append(content.strip())
28
+
29
+ def generate_summary(self):
30
+ full_text = " ".join(self.transcript_chunks)
31
+
32
+ # If text is too short, skip summarization
33
+ if len(full_text.split()) < 50:
34
+ return "Not enough content for summary"
35
+
36
+ # Generate summary in chunks for long meetings
37
+ max_chunk_size = 1000
38
+ chunks = [full_text[i:i+max_chunk_size] for i in range(0, len(full_text), max_chunk_size)]
39
+
40
+ summaries = []
41
+ for chunk in chunks:
42
+ summary = summarizer(
43
+ chunk,
44
+ max_length=config.SUMMARY_MAX_LENGTH,
45
+ min_length=config.SUMMARY_MIN_LENGTH,
46
+ do_sample=False
47
+ )[0]['summary_text']
48
+ summaries.append(summary)
49
+
50
+ return " ".join(summaries)
51
+
52
+ def extract_action_items(self):
53
+ full_text = " ".join(self.transcript_chunks)
54
+ action_items = []
55
+
56
+ # Pattern matching for action items
57
+ patterns = [
58
+ r"(\bwill\b.*?\bby\b\s+\w+\s+\d{1,2})",
59
+ r"(\baction\b:\s*(.*?)(?:\bdeadline\b|\bby\b|\bfor\b)\s*(\w+\s+\d{1,2}))",
60
+ r"(\btodo\b:\s*(.*?)(?:\bdue\b|\bby\b)\s*(\w+\s+\d{1,2}))",
61
+ r"(\bassign(?:ed)? to\b\s+(\w+):\s*(.*?)(?:\bdeadline\b|\bby\b)\s*(\w+\s+\d{1,2}))"
62
+ ]
63
+
64
+ for pattern in patterns:
65
+ for match in re.finditer(pattern, full_text, re.IGNORECASE):
66
+ groups = match.groups()
67
+ if groups:
68
+ # Different patterns have different group structures
69
+ if len(groups) == 1:
70
+ task = groups[0]
71
+ owner = "Unassigned"
72
+ deadline = "ASAP"
73
+ elif len(groups) == 3:
74
+ task = groups[1]
75
+ owner = groups[0]
76
+ deadline = groups[2]
77
+ else:
78
+ task = groups[0]
79
+ owner = "Unassigned"
80
+ deadline = "ASAP"
81
+
82
+ action_items.append({
83
+ "task": task.strip(),
84
+ "owner": owner.strip(),
85
+ "deadline": self.normalize_deadline(deadline.strip())
86
+ })
87
+
88
+ return action_items
89
+
90
+ def detect_urgent_action_items(self):
91
+ urgent_items = []
92
+ for item in self.action_items:
93
+ if "urgent" in item['task'].lower() or "asap" in item['deadline'].lower():
94
+ urgent_items.append(item)
95
+ return urgent_items
96
+
97
+ def extract_decisions(self):
98
+ full_text = " ".join(self.transcript_chunks)
99
+ decisions = []
100
+
101
+ # Pattern matching for decisions
102
+ patterns = [
103
+ r"\bdecided to\b (.*?)[\.\n]",
104
+ r"\bagreed that\b (.*?)[\.\n]",
105
+ r"\bconsensus is\b (.*?)[\.\n]",
106
+ r"\bresolution\b: (.*?)[\.\n]"
107
+ ]
108
+
109
+ for pattern in patterns:
110
+ for match in re.finditer(pattern, full_text, re.IGNORECASE):
111
+ decision = match.group(1).strip()
112
+ decisions.append(decision)
113
+
114
+ return decisions
115
+
116
+ def normalize_deadline(self, deadline_str):
117
+ today = datetime.now()
118
+ lower_str = deadline_str.lower()
119
+
120
+ if "today" in lower_str:
121
+ return today.strftime("%Y-%m-%d")
122
+ elif "tomorrow" in lower_str:
123
+ return (today + timedelta(days=1)).strftime("%Y-%m-%d")
124
+ elif "next week" in lower_str:
125
+ return (today + timedelta(weeks=1)).strftime("%Y-%m-%d")
126
+ elif "eod" in lower_str:
127
+ return today.strftime("%Y-%m-%d")
128
+ elif "eow" in lower_str:
129
+ # Find next Friday
130
+ days_ahead = 4 - today.weekday() # 0 = Monday, 4 = Friday
131
+ if days_ahead <= 0: # If today is Friday or weekend
132
+ days_ahead += 7
133
+ return (today + timedelta(days=days_ahead)).strftime("%Y-%m-%d")
134
+
135
+ return deadline_str