Update analyzer.py
Browse files- analyzer.py +31 -91
analyzer.py
CHANGED
@@ -1,137 +1,77 @@
|
|
1 |
-
# analyzer.py (
|
2 |
-
from transformers import pipeline
|
3 |
import re
|
4 |
from datetime import datetime, timedelta
|
5 |
-
import config
|
6 |
-
|
7 |
-
# Load NLP model
|
8 |
-
summarizer = pipeline("summarization", model="philschmid/bart-large-cnn-samsum")
|
9 |
|
10 |
class MeetingAnalyzer:
|
11 |
def __init__(self):
|
12 |
self.transcript_chunks = []
|
13 |
-
self.speakers = {}
|
14 |
-
self.current_speaker = "Unknown"
|
15 |
self.action_items = []
|
16 |
self.decisions = []
|
17 |
|
18 |
def process_chunk(self, text_chunk):
|
19 |
self.transcript_chunks.append(text_chunk)
|
20 |
|
21 |
-
# Simple
|
22 |
-
if "
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
self.speakers[self.current_speaker] = []
|
29 |
-
self.speakers[self.current_speaker].append(content.strip())
|
30 |
|
31 |
def generate_summary(self):
|
32 |
full_text = " ".join(self.transcript_chunks)
|
33 |
|
34 |
# If text is too short, skip summarization
|
35 |
if len(full_text.split()) < 50:
|
36 |
-
return "
|
37 |
-
|
38 |
-
# Generate summary in chunks for long meetings
|
39 |
-
max_chunk_size = 1000
|
40 |
-
chunks = [full_text[i:i+max_chunk_size] for i in range(0, len(full_text), max_chunk_size)]
|
41 |
-
|
42 |
-
summaries = []
|
43 |
-
for chunk in chunks:
|
44 |
-
summary = summarizer(
|
45 |
-
chunk,
|
46 |
-
max_length=config.SUMMARY_MAX_LENGTH,
|
47 |
-
min_length=config.SUMMARY_MIN_LENGTH,
|
48 |
-
do_sample=False
|
49 |
-
)[0]['summary_text']
|
50 |
-
summaries.append(summary)
|
51 |
|
52 |
-
|
|
|
|
|
53 |
|
54 |
def extract_action_items(self):
|
55 |
-
|
56 |
action_items = []
|
57 |
|
58 |
-
#
|
59 |
patterns = [
|
60 |
-
r"(\
|
61 |
-
r"(\baction\b
|
62 |
-
r"(\btodo\b
|
63 |
-
r"(\
|
64 |
]
|
65 |
|
66 |
for pattern in patterns:
|
67 |
-
for match in re.finditer(pattern,
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
deadline = "ASAP"
|
75 |
-
elif len(groups) == 3:
|
76 |
-
task = groups[1]
|
77 |
-
owner = groups[0]
|
78 |
-
deadline = groups[2]
|
79 |
-
else:
|
80 |
-
task = groups[0]
|
81 |
-
owner = "Unassigned"
|
82 |
-
deadline = "ASAP"
|
83 |
-
|
84 |
-
action_items.append({
|
85 |
-
"task": task.strip(),
|
86 |
-
"owner": owner.strip(),
|
87 |
-
"deadline": self.normalize_deadline(deadline.strip())
|
88 |
-
})
|
89 |
|
90 |
return action_items
|
91 |
|
92 |
def detect_urgent_action_items(self):
|
93 |
urgent_items = []
|
94 |
for item in self.action_items:
|
95 |
-
if "urgent" in item['task'].lower() or "asap" in item['
|
96 |
urgent_items.append(item)
|
97 |
return urgent_items
|
98 |
|
99 |
def extract_decisions(self):
|
100 |
-
full_text = " ".join(self.transcript_chunks)
|
101 |
decisions = []
|
102 |
|
103 |
-
#
|
104 |
patterns = [
|
105 |
r"\bdecided to\b (.*?)[\.\n]",
|
106 |
r"\bagreed that\b (.*?)[\.\n]",
|
107 |
-
r"\bconsensus is\b (.*?)[\.\n]"
|
108 |
-
r"\bresolution\b: (.*?)[\.\n]"
|
109 |
]
|
110 |
|
111 |
for pattern in patterns:
|
112 |
-
for match in re.finditer(pattern,
|
113 |
-
|
114 |
-
decisions.append(decision)
|
115 |
-
|
116 |
-
return decisions
|
117 |
-
|
118 |
-
def normalize_deadline(self, deadline_str):
|
119 |
-
today = datetime.now()
|
120 |
-
lower_str = deadline_str.lower()
|
121 |
-
|
122 |
-
if "today" in lower_str:
|
123 |
-
return today.strftime("%Y-%m-%d")
|
124 |
-
elif "tomorrow" in lower_str:
|
125 |
-
return (today + timedelta(days=1)).strftime("%Y-%m-%d")
|
126 |
-
elif "next week" in lower_str:
|
127 |
-
return (today + timedelta(weeks=1)).strftime("%Y-%m-%d")
|
128 |
-
elif "eod" in lower_str:
|
129 |
-
return today.strftime("%Y-%m-%d")
|
130 |
-
elif "eow" in lower_str:
|
131 |
-
# Find next Friday
|
132 |
-
days_ahead = 4 - today.weekday() # 0 = Monday, 4 = Friday
|
133 |
-
if days_ahead <= 0: # If today is Friday or weekend
|
134 |
-
days_ahead += 7
|
135 |
-
return (today + timedelta(days=days_ahead)).strftime("%Y-%m-%d")
|
136 |
|
137 |
-
return
|
|
|
1 |
+
# analyzer.py (Meeting Analysis)
|
|
|
2 |
import re
|
3 |
from datetime import datetime, timedelta
|
|
|
|
|
|
|
|
|
4 |
|
5 |
class MeetingAnalyzer:
|
6 |
def __init__(self):
|
7 |
self.transcript_chunks = []
|
|
|
|
|
8 |
self.action_items = []
|
9 |
self.decisions = []
|
10 |
|
11 |
def process_chunk(self, text_chunk):
|
12 |
self.transcript_chunks.append(text_chunk)
|
13 |
|
14 |
+
# Simple action item detection
|
15 |
+
if "action item" in text_chunk.lower() or "todo" in text_chunk.lower():
|
16 |
+
self.action_items.append({
|
17 |
+
"task": text_chunk,
|
18 |
+
"owner": "Unassigned",
|
19 |
+
"deadline": "ASAP"
|
20 |
+
})
|
|
|
|
|
21 |
|
22 |
def generate_summary(self):
|
23 |
full_text = " ".join(self.transcript_chunks)
|
24 |
|
25 |
# If text is too short, skip summarization
|
26 |
if len(full_text.split()) < 50:
|
27 |
+
return "Meeting in progress... summary will appear here"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
+
# Simple summary - just the first few sentences
|
30 |
+
sentences = full_text.split('. ')
|
31 |
+
return ". ".join(sentences[:3]) + "."
|
32 |
|
33 |
def extract_action_items(self):
|
34 |
+
# Extract action items from transcript
|
35 |
action_items = []
|
36 |
|
37 |
+
# Look for assignment patterns
|
38 |
patterns = [
|
39 |
+
r"(\bassign\b.*? to \b(.*?)\b)",
|
40 |
+
r"(\baction item\b: (.*?))",
|
41 |
+
r"(\btodo\b: (.*?))",
|
42 |
+
r"(\bplease\b (.*?))"
|
43 |
]
|
44 |
|
45 |
for pattern in patterns:
|
46 |
+
for match in re.finditer(pattern, " ".join(self.transcript_chunks), re.IGNORECASE):
|
47 |
+
task = match.group(1).strip()
|
48 |
+
action_items.append({
|
49 |
+
"task": task,
|
50 |
+
"owner": "Unassigned",
|
51 |
+
"deadline": "ASAP"
|
52 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
return action_items
|
55 |
|
56 |
def detect_urgent_action_items(self):
|
57 |
urgent_items = []
|
58 |
for item in self.action_items:
|
59 |
+
if "urgent" in item['task'].lower() or "asap" in item['task'].lower():
|
60 |
urgent_items.append(item)
|
61 |
return urgent_items
|
62 |
|
63 |
def extract_decisions(self):
|
|
|
64 |
decisions = []
|
65 |
|
66 |
+
# Look for decision patterns
|
67 |
patterns = [
|
68 |
r"\bdecided to\b (.*?)[\.\n]",
|
69 |
r"\bagreed that\b (.*?)[\.\n]",
|
70 |
+
r"\bconsensus is\b (.*?)[\.\n]"
|
|
|
71 |
]
|
72 |
|
73 |
for pattern in patterns:
|
74 |
+
for match in re.finditer(pattern, " ".join(self.transcript_chunks), re.IGNORECASE):
|
75 |
+
decisions.append(match.group(1).strip())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
+
return decisions
|