Spaces:
Runtime error
Runtime error
Update output_parser.py
Browse files- output_parser.py +2 -50
output_parser.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1 |
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
|
2 |
from langchain.prompts import PromptTemplate
|
3 |
-
from pydantic import BaseModel
|
4 |
-
from typing import
|
5 |
-
import re
|
6 |
|
7 |
class AttachmentStyle(BaseModel):
|
8 |
speaker: str
|
@@ -101,50 +100,3 @@ def parse_analysis_output(output: str, analysis_type: str) -> Dict[str, BaseMode
|
|
101 |
return {parsed['speaker']: PersonalityDisorder(**parsed)}
|
102 |
else:
|
103 |
raise ValueError(f"Unknown analysis type: {analysis_type}")
|
104 |
-
|
105 |
-
def parse_srt_output(srt_content: str) -> Dict[str, Dict[str, float]]:
|
106 |
-
speakers = {}
|
107 |
-
current_speaker = None
|
108 |
-
utterance_count = 0
|
109 |
-
total_words = 0
|
110 |
-
|
111 |
-
for line in srt_content.split('\n'):
|
112 |
-
if line.startswith("Speaker"):
|
113 |
-
current_speaker = line.strip()
|
114 |
-
if current_speaker not in speakers:
|
115 |
-
speakers[current_speaker] = {"total_duration": 0, "utterance_count": 0, "total_words": 0}
|
116 |
-
elif line.startswith(" time:"):
|
117 |
-
time_match = re.search(r'\((.+?) --> (.+?)\)', line)
|
118 |
-
if time_match and current_speaker:
|
119 |
-
start_time = time_to_seconds(time_match.group(1))
|
120 |
-
end_time = time_to_seconds(time_match.group(2))
|
121 |
-
duration = end_time - start_time
|
122 |
-
speakers[current_speaker]["total_duration"] += duration
|
123 |
-
speakers[current_speaker]["utterance_count"] += 1
|
124 |
-
elif line.startswith(" text:"):
|
125 |
-
text = line.replace(" text:", "").strip()
|
126 |
-
words = len(text.split())
|
127 |
-
speakers[current_speaker]["total_words"] += words
|
128 |
-
|
129 |
-
for speaker in speakers:
|
130 |
-
speakers[speaker]["average_utterance_length"] = (
|
131 |
-
speakers[speaker]["total_words"] / speakers[speaker]["utterance_count"]
|
132 |
-
if speakers[speaker]["utterance_count"] > 0
|
133 |
-
else 0
|
134 |
-
)
|
135 |
-
|
136 |
-
return speakers
|
137 |
-
|
138 |
-
def time_to_seconds(time_str: str) -> float:
|
139 |
-
h, m, s = time_str.split(':')
|
140 |
-
return int(h) * 3600 + int(m) * 60 + float(s)
|
141 |
-
|
142 |
-
def get_speaker_data_for_charts(parsed_output: Dict[str, Dict[str, float]]) -> Dict[str, Dict[str, float]]:
|
143 |
-
return {
|
144 |
-
speaker: {
|
145 |
-
"total_duration": data["total_duration"] / 60, # Convert to minutes
|
146 |
-
"utterance_count": data["utterance_count"],
|
147 |
-
"average_utterance_length": data["average_utterance_length"]
|
148 |
-
}
|
149 |
-
for speaker, data in parsed_output.items()
|
150 |
-
}
|
|
|
1 |
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
|
2 |
from langchain.prompts import PromptTemplate
|
3 |
+
from pydantic import BaseModel
|
4 |
+
from typing import Dict
|
|
|
5 |
|
6 |
class AttachmentStyle(BaseModel):
|
7 |
speaker: str
|
|
|
100 |
return {parsed['speaker']: PersonalityDisorder(**parsed)}
|
101 |
else:
|
102 |
raise ValueError(f"Unknown analysis type: {analysis_type}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|