reab5555 commited on
Commit
8df2e34
·
verified ·
1 Parent(s): 76c5624

Update output_parser.py

Browse files
Files changed (1) hide show
  1. output_parser.py +2 -50
output_parser.py CHANGED
@@ -1,8 +1,7 @@
1
  from langchain.output_parsers import StructuredOutputParser, ResponseSchema
2
  from langchain.prompts import PromptTemplate
3
- from pydantic import BaseModel, Field
4
- from typing import List, Dict
5
- import re
6
 
7
  class AttachmentStyle(BaseModel):
8
  speaker: str
@@ -101,50 +100,3 @@ def parse_analysis_output(output: str, analysis_type: str) -> Dict[str, BaseMode
101
  return {parsed['speaker']: PersonalityDisorder(**parsed)}
102
  else:
103
  raise ValueError(f"Unknown analysis type: {analysis_type}")
104
-
105
- def parse_srt_output(srt_content: str) -> Dict[str, Dict[str, float]]:
106
- speakers = {}
107
- current_speaker = None
108
- utterance_count = 0
109
- total_words = 0
110
-
111
- for line in srt_content.split('\n'):
112
- if line.startswith("Speaker"):
113
- current_speaker = line.strip()
114
- if current_speaker not in speakers:
115
- speakers[current_speaker] = {"total_duration": 0, "utterance_count": 0, "total_words": 0}
116
- elif line.startswith(" time:"):
117
- time_match = re.search(r'\((.+?) --> (.+?)\)', line)
118
- if time_match and current_speaker:
119
- start_time = time_to_seconds(time_match.group(1))
120
- end_time = time_to_seconds(time_match.group(2))
121
- duration = end_time - start_time
122
- speakers[current_speaker]["total_duration"] += duration
123
- speakers[current_speaker]["utterance_count"] += 1
124
- elif line.startswith(" text:"):
125
- text = line.replace(" text:", "").strip()
126
- words = len(text.split())
127
- speakers[current_speaker]["total_words"] += words
128
-
129
- for speaker in speakers:
130
- speakers[speaker]["average_utterance_length"] = (
131
- speakers[speaker]["total_words"] / speakers[speaker]["utterance_count"]
132
- if speakers[speaker]["utterance_count"] > 0
133
- else 0
134
- )
135
-
136
- return speakers
137
-
138
- def time_to_seconds(time_str: str) -> float:
139
- h, m, s = time_str.split(':')
140
- return int(h) * 3600 + int(m) * 60 + float(s)
141
-
142
- def get_speaker_data_for_charts(parsed_output: Dict[str, Dict[str, float]]) -> Dict[str, Dict[str, float]]:
143
- return {
144
- speaker: {
145
- "total_duration": data["total_duration"] / 60, # Convert to minutes
146
- "utterance_count": data["utterance_count"],
147
- "average_utterance_length": data["average_utterance_length"]
148
- }
149
- for speaker, data in parsed_output.items()
150
- }
 
1
  from langchain.output_parsers import StructuredOutputParser, ResponseSchema
2
  from langchain.prompts import PromptTemplate
3
+ from pydantic import BaseModel
4
+ from typing import Dict
 
5
 
6
  class AttachmentStyle(BaseModel):
7
  speaker: str
 
100
  return {parsed['speaker']: PersonalityDisorder(**parsed)}
101
  else:
102
  raise ValueError(f"Unknown analysis type: {analysis_type}")