gaur3009 commited on
Commit
7ab5858
·
verified ·
1 Parent(s): 4d65b38

Update analyzer.py

Browse files
Files changed (1) hide show
  1. analyzer.py +31 -91
analyzer.py CHANGED
@@ -1,137 +1,77 @@
1
- # analyzer.py (Key Point Extraction)
2
- from transformers import pipeline
3
  import re
4
  from datetime import datetime, timedelta
5
- import config
6
-
7
- # Load NLP model
8
- summarizer = pipeline("summarization", model="philschmid/bart-large-cnn-samsum")
9
 
10
  class MeetingAnalyzer:
11
  def __init__(self):
12
  self.transcript_chunks = []
13
- self.speakers = {}
14
- self.current_speaker = "Unknown"
15
  self.action_items = []
16
  self.decisions = []
17
 
18
  def process_chunk(self, text_chunk):
19
  self.transcript_chunks.append(text_chunk)
20
 
21
- # Simple speaker detection
22
- if ":" in text_chunk:
23
- parts = text_chunk.split(":", 1)
24
- if len(parts) > 1:
25
- speaker, content = parts
26
- self.current_speaker = speaker.strip()
27
- if self.current_speaker not in self.speakers:
28
- self.speakers[self.current_speaker] = []
29
- self.speakers[self.current_speaker].append(content.strip())
30
 
31
  def generate_summary(self):
32
  full_text = " ".join(self.transcript_chunks)
33
 
34
  # If text is too short, skip summarization
35
  if len(full_text.split()) < 50:
36
- return "Not enough content for summary"
37
-
38
- # Generate summary in chunks for long meetings
39
- max_chunk_size = 1000
40
- chunks = [full_text[i:i+max_chunk_size] for i in range(0, len(full_text), max_chunk_size)]
41
-
42
- summaries = []
43
- for chunk in chunks:
44
- summary = summarizer(
45
- chunk,
46
- max_length=config.SUMMARY_MAX_LENGTH,
47
- min_length=config.SUMMARY_MIN_LENGTH,
48
- do_sample=False
49
- )[0]['summary_text']
50
- summaries.append(summary)
51
 
52
- return " ".join(summaries)
 
 
53
 
54
  def extract_action_items(self):
55
- full_text = " ".join(self.transcript_chunks)
56
  action_items = []
57
 
58
- # Pattern matching for action items
59
  patterns = [
60
- r"(\bwill\b.*?\bby\b\s+\w+\s+\d{1,2})",
61
- r"(\baction\b:\s*(.*?)(?:\bdeadline\b|\bby\b|\bfor\b)\s*(\w+\s+\d{1,2}))",
62
- r"(\btodo\b:\s*(.*?)(?:\bdue\b|\bby\b)\s*(\w+\s+\d{1,2}))",
63
- r"(\bassign(?:ed)? to\b\s+(\w+):\s*(.*?)(?:\bdeadline\b|\bby\b)\s*(\w+\s+\d{1,2}))"
64
  ]
65
 
66
  for pattern in patterns:
67
- for match in re.finditer(pattern, full_text, re.IGNORECASE):
68
- groups = match.groups()
69
- if groups:
70
- # Different patterns have different group structures
71
- if len(groups) == 1:
72
- task = groups[0]
73
- owner = "Unassigned"
74
- deadline = "ASAP"
75
- elif len(groups) == 3:
76
- task = groups[1]
77
- owner = groups[0]
78
- deadline = groups[2]
79
- else:
80
- task = groups[0]
81
- owner = "Unassigned"
82
- deadline = "ASAP"
83
-
84
- action_items.append({
85
- "task": task.strip(),
86
- "owner": owner.strip(),
87
- "deadline": self.normalize_deadline(deadline.strip())
88
- })
89
 
90
  return action_items
91
 
92
  def detect_urgent_action_items(self):
93
  urgent_items = []
94
  for item in self.action_items:
95
- if "urgent" in item['task'].lower() or "asap" in item['deadline'].lower():
96
  urgent_items.append(item)
97
  return urgent_items
98
 
99
  def extract_decisions(self):
100
- full_text = " ".join(self.transcript_chunks)
101
  decisions = []
102
 
103
- # Pattern matching for decisions
104
  patterns = [
105
  r"\bdecided to\b (.*?)[\.\n]",
106
  r"\bagreed that\b (.*?)[\.\n]",
107
- r"\bconsensus is\b (.*?)[\.\n]",
108
- r"\bresolution\b: (.*?)[\.\n]"
109
  ]
110
 
111
  for pattern in patterns:
112
- for match in re.finditer(pattern, full_text, re.IGNORECASE):
113
- decision = match.group(1).strip()
114
- decisions.append(decision)
115
-
116
- return decisions
117
-
118
- def normalize_deadline(self, deadline_str):
119
- today = datetime.now()
120
- lower_str = deadline_str.lower()
121
-
122
- if "today" in lower_str:
123
- return today.strftime("%Y-%m-%d")
124
- elif "tomorrow" in lower_str:
125
- return (today + timedelta(days=1)).strftime("%Y-%m-%d")
126
- elif "next week" in lower_str:
127
- return (today + timedelta(weeks=1)).strftime("%Y-%m-%d")
128
- elif "eod" in lower_str:
129
- return today.strftime("%Y-%m-%d")
130
- elif "eow" in lower_str:
131
- # Find next Friday
132
- days_ahead = 4 - today.weekday() # 0 = Monday, 4 = Friday
133
- if days_ahead <= 0: # If today is Friday or weekend
134
- days_ahead += 7
135
- return (today + timedelta(days=days_ahead)).strftime("%Y-%m-%d")
136
 
137
- return deadline_str
 
1
+ # analyzer.py (Meeting Analysis)
 
2
  import re
3
  from datetime import datetime, timedelta
 
 
 
 
4
 
5
  class MeetingAnalyzer:
6
  def __init__(self):
7
  self.transcript_chunks = []
 
 
8
  self.action_items = []
9
  self.decisions = []
10
 
11
  def process_chunk(self, text_chunk):
12
  self.transcript_chunks.append(text_chunk)
13
 
14
+ # Simple action item detection
15
+ if "action item" in text_chunk.lower() or "todo" in text_chunk.lower():
16
+ self.action_items.append({
17
+ "task": text_chunk,
18
+ "owner": "Unassigned",
19
+ "deadline": "ASAP"
20
+ })
 
 
21
 
22
  def generate_summary(self):
23
  full_text = " ".join(self.transcript_chunks)
24
 
25
  # If text is too short, skip summarization
26
  if len(full_text.split()) < 50:
27
+ return "Meeting in progress... summary will appear here"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ # Simple summary - just the first few sentences
30
+ sentences = full_text.split('. ')
31
+ return ". ".join(sentences[:3]) + "."
32
 
33
  def extract_action_items(self):
34
+ # Extract action items from transcript
35
  action_items = []
36
 
37
+ # Look for assignment patterns
38
  patterns = [
39
+ r"(\bassign\b.*? to \b(.*?)\b)",
40
+ r"(\baction item\b: (.*?))",
41
+ r"(\btodo\b: (.*?))",
42
+ r"(\bplease\b (.*?))"
43
  ]
44
 
45
  for pattern in patterns:
46
+ for match in re.finditer(pattern, " ".join(self.transcript_chunks), re.IGNORECASE):
47
+ task = match.group(1).strip()
48
+ action_items.append({
49
+ "task": task,
50
+ "owner": "Unassigned",
51
+ "deadline": "ASAP"
52
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  return action_items
55
 
56
  def detect_urgent_action_items(self):
57
  urgent_items = []
58
  for item in self.action_items:
59
+ if "urgent" in item['task'].lower() or "asap" in item['task'].lower():
60
  urgent_items.append(item)
61
  return urgent_items
62
 
63
  def extract_decisions(self):
 
64
  decisions = []
65
 
66
+ # Look for decision patterns
67
  patterns = [
68
  r"\bdecided to\b (.*?)[\.\n]",
69
  r"\bagreed that\b (.*?)[\.\n]",
70
+ r"\bconsensus is\b (.*?)[\.\n]"
 
71
  ]
72
 
73
  for pattern in patterns:
74
+ for match in re.finditer(pattern, " ".join(self.transcript_chunks), re.IGNORECASE):
75
+ decisions.append(match.group(1).strip())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ return decisions