SilasKieser commited on
Commit
5203247
·
1 Parent(s): cc68f3b

my version of timestamped text

Browse files
Files changed (1) hide show
  1. src/whisper/timestaped_words.py +95 -0
src/whisper/timestaped_words.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np
3
+ from collections import namedtuple
4
+
5
+ class TimeStampedSegment():
6
+ def __init__(self, start=None, end=None, text=""):
7
+ self.start = start
8
+ self.end = end
9
+ self.text = text
10
+
11
+ def __getitem__(self, key):
12
+ if key == 0:
13
+ return self.start
14
+ elif key == 1:
15
+ return self.end
16
+ elif key == 2:
17
+ return self.text
18
+ elif isinstance(key, slice):
19
+ raise NotImplementedError('Slicing not supported')
20
+
21
+ def __str__(self):
22
+ return f'{self.start} - {self.end}: {self.text}'
23
+
24
+ def __repr__(self):
25
+ return self.__str__()
26
+
27
+ def shift(self, shift):
28
+ return TimeStampedSegment(self.start + shift, self.end + shift, self.text)
29
+
30
+ def append_text(self, text):
31
+ self.text += text
32
+
33
+ def __eq__(self, other):
34
+ return self.start == other.start and self.end == other.end and self.text == other.text
35
+
36
+ def __add__(self, other):
37
+ if isinstance(other, (int, float)):
38
+ return self.shift(other)
39
+ elif isinstance(other, str):
40
+ return TimeStampedSegment(self.start, self.end, self.text + other)
41
+ else:
42
+ raise TypeError(f"unsupported operand type(s) for +: '{type(self).__name__}' and '{type(other).__name__}'")
43
+
44
+
45
+
46
+
47
+ class TimeStampedText(list):
48
+
49
+ def __init__(self, time_stamped_segments: list[TimeStampedSegment]):
50
+ super().__init__(time_stamped_segments)
51
+ self._index = 0
52
+
53
+ def words(self):
54
+ return [segment.text for segment in self]
55
+ def starts(self):
56
+ return [segment.start for segment in self]
57
+
58
+ def ends(self):
59
+ return [segment.end for segment in self]
60
+
61
+
62
+ def concatenate(self, sep:str, offset=0)->TimeStampedSegment:
63
+ """
64
+ Concatenates the timestamped words or sentences into a single sequence with timing information.
65
+ This method joins all words in the sequence using the specified separator and preserves
66
+ the timing information from the first to the last word.
67
+ Args:
68
+ sep (str): Separator string used to join the words together
69
+ offset (float, optional): Time offset to add to begin/end timestamps. Defaults to 0.
70
+ Returns:
71
+ TimeStampedSegment: A new segment containing:
72
+ - Start time: First word's start time + offset
73
+ - End time: Last word's end time + offset
74
+ - Text: All words joined by separator
75
+ Examples:
76
+ >>> seg = TimeStampedSegment([(1.0, 2.0, "hello"), (2.1, 3.0, "world!")])
77
+ >>> result = seg.concatenate(" ")
78
+ >>> print(result)
79
+ (1.0, 3.0, "hello world!")
80
+ Notes:
81
+ Returns an empty TimeStampedSegment if the current segment contains no words.
82
+ """
83
+
84
+
85
+ if len(self) == 0:
86
+ return TimeStampedSegment()
87
+
88
+ combined_text = sep.join(self.words())
89
+
90
+ b = offset + self[0][0]
91
+ e = offset + self[-1][1]
92
+ return TimeStampedSegment(b, e, combined_text)
93
+
94
+
95
+