reab5555 commited on
Commit
4fad525
·
verified ·
1 Parent(s): 21dcf63

Update output_parser.py

Browse files
Files changed (1) hide show
  1. output_parser.py +34 -16
output_parser.py CHANGED
@@ -1,6 +1,40 @@
1
  import re
2
  from collections import defaultdict
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  def parse_srt_output(srt_content):
5
  speakers = defaultdict(lambda: {"utterances": [], "total_duration": 0})
6
  current_speaker = None
@@ -25,17 +59,6 @@ def parse_srt_output(srt_content):
25
  speakers[current_speaker]["utterances"].append(current_utterance.copy())
26
  current_utterance = {"start": "", "end": "", "text": ""}
27
 
28
- # Print the parsed output for debugging
29
- print("Parsed SRT Output:")
30
- for speaker, data in speakers.items():
31
- print(f"{speaker}:")
32
- print(f" Total duration: {data['total_duration']}")
33
- print(f" Utterances:")
34
- for utterance in data['utterances'][:3]: # Print first 3 utterances for brevity
35
- print(f" {utterance['start']} - {utterance['end']}: {utterance['text']}")
36
- print(f" Total utterances: {len(data['utterances'])}")
37
- print()
38
-
39
  return speakers
40
 
41
  def get_speaker_data_for_charts(parsed_output):
@@ -46,9 +69,4 @@ def get_speaker_data_for_charts(parsed_output):
46
  "utterance_count": len(data["utterances"]),
47
  "average_utterance_length": sum(len(u["text"].split()) for u in data["utterances"]) / len(data["utterances"]) if data["utterances"] else 0
48
  }
49
-
50
- # Print the data for charts
51
- print("Data for Charts:")
52
- print(speaker_data)
53
-
54
  return speaker_data
 
1
  import re
2
  from collections import defaultdict
3
 
4
+ def parse_analysis_output(text):
5
+ speakers_data = {}
6
+ current_speaker = None
7
+ explanation = ""
8
+ for line in text.split('\n'):
9
+ line = line.strip()
10
+ if line.startswith("-----------------------"):
11
+ if current_speaker and explanation:
12
+ speakers_data[current_speaker]["explanation"] = explanation.strip()
13
+ explanation = ""
14
+ current_speaker = None
15
+ continue
16
+ if line.startswith("Speaker"):
17
+ current_speaker = line.strip()
18
+ speakers_data[current_speaker] = {}
19
+ elif ':' in line and current_speaker:
20
+ key, value = line.split(':', 1)
21
+ key = key.strip()
22
+ value = value.strip()
23
+ if key.lower() == "explanation":
24
+ explanation += value + " "
25
+ else:
26
+ try:
27
+ speakers_data[current_speaker][key] = float(value)
28
+ except ValueError:
29
+ speakers_data[current_speaker][key] = value
30
+ elif line and current_speaker and not line.startswith("Explanation"):
31
+ explanation += line + " "
32
+
33
+ if current_speaker and explanation:
34
+ speakers_data[current_speaker]["explanation"] = explanation.strip()
35
+
36
+ return speakers_data
37
+
38
  def parse_srt_output(srt_content):
39
  speakers = defaultdict(lambda: {"utterances": [], "total_duration": 0})
40
  current_speaker = None
 
59
  speakers[current_speaker]["utterances"].append(current_utterance.copy())
60
  current_utterance = {"start": "", "end": "", "text": ""}
61
 
 
 
 
 
 
 
 
 
 
 
 
62
  return speakers
63
 
64
  def get_speaker_data_for_charts(parsed_output):
 
69
  "utterance_count": len(data["utterances"]),
70
  "average_utterance_length": sum(len(u["text"].split()) for u in data["utterances"]) / len(data["utterances"]) if data["utterances"] else 0
71
  }
 
 
 
 
 
72
  return speaker_data