SilasKieser commited on
Commit
5c38e1b
·
1 Parent(s): 5203247

add doctest

Browse files
Files changed (1) hide show
  1. src/whisper/timestaped_words.py +71 -58
src/whisper/timestaped_words.py CHANGED
@@ -1,33 +1,55 @@
 
1
 
2
- import numpy as np
3
- from collections import namedtuple
 
4
 
5
- class TimeStampedSegment():
6
- def __init__(self, start=None, end=None, text=""):
 
 
 
 
7
  self.start = start
8
  self.end = end
9
  self.text = text
10
 
11
- def __getitem__(self, key):
12
- if key == 0:
13
- return self.start
14
- elif key == 1:
15
- return self.end
16
- elif key == 2:
17
- return self.text
18
- elif isinstance(key, slice):
19
- raise NotImplementedError('Slicing not supported')
20
-
21
  def __str__(self):
22
  return f'{self.start} - {self.end}: {self.text}'
23
 
24
  def __repr__(self):
25
  return self.__str__()
26
 
27
- def shift(self, shift):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  return TimeStampedSegment(self.start + shift, self.end + shift, self.text)
29
 
30
- def append_text(self, text):
 
 
 
 
 
 
 
 
 
 
 
 
31
  self.text += text
32
 
33
  def __eq__(self, other):
@@ -40,56 +62,47 @@ class TimeStampedSegment():
40
  return TimeStampedSegment(self.start, self.end, self.text + other)
41
  else:
42
  raise TypeError(f"unsupported operand type(s) for +: '{type(self).__name__}' and '{type(other).__name__}'")
43
-
44
-
45
-
46
 
47
- class TimeStampedText(list):
 
 
48
 
49
- def __init__(self, time_stamped_segments: list[TimeStampedSegment]):
50
- super().__init__(time_stamped_segments)
51
- self._index = 0
52
-
53
- def words(self):
54
- return [segment.text for segment in self]
55
- def starts(self):
56
- return [segment.start for segment in self]
57
-
58
- def ends(self):
59
- return [segment.end for segment in self]
60
-
61
 
62
- def concatenate(self, sep:str, offset=0)->TimeStampedSegment:
63
  """
64
- Concatenates the timestamped words or sentences into a single sequence with timing information.
65
- This method joins all words in the sequence using the specified separator and preserves
66
- the timing information from the first to the last word.
67
  Args:
68
- sep (str): Separator string used to join the words together
69
- offset (float, optional): Time offset to add to begin/end timestamps. Defaults to 0.
70
- Returns:
71
- TimeStampedSegment: A new segment containing:
72
- - Start time: First word's start time + offset
73
- - End time: Last word's end time + offset
74
- - Text: All words joined by separator
75
- Examples:
76
- >>> seg = TimeStampedSegment([(1.0, 2.0, "hello"), (2.1, 3.0, "world!")])
77
- >>> result = seg.concatenate(" ")
78
- >>> print(result)
79
- (1.0, 3.0, "hello world!")
80
- Notes:
81
- Returns an empty TimeStampedSegment if the current segment contains no words.
82
- """
83
-
84
 
85
- if len(self) == 0:
86
- return TimeStampedSegment()
 
 
 
 
 
 
87
 
88
- combined_text = sep.join(self.words())
 
89
 
90
- b = offset + self[0][0]
91
- e = offset + self[-1][1]
92
- return TimeStampedSegment(b, e, combined_text)
93
 
 
 
94
 
 
 
 
 
95
 
 
 
 
 
1
+ from typing import List
2
 
3
+ class TimeStampedSegment:
4
+ """
5
+ Represents a segment of text with start and end timestamps.
6
 
7
+ Attributes:
8
+ start (float): The start time of the segment.
9
+ end (float): The end time of the segment.
10
+ text (str): The text of the segment.
11
+ """
12
+ def __init__(self, start: float, end: float, text: str):
13
  self.start = start
14
  self.end = end
15
  self.text = text
16
 
 
 
 
 
 
 
 
 
 
 
17
  def __str__(self):
18
  return f'{self.start} - {self.end}: {self.text}'
19
 
20
  def __repr__(self):
21
  return self.__str__()
22
 
23
+ def shift(self, shift: float):
24
+ """
25
+ Shifts the segment by a given amount of time.
26
+
27
+ Args:
28
+ shift (float): The amount of time to shift the segment.
29
+
30
+ Returns:
31
+ TimeStampedSegment: A new segment shifted by the given amount of time.
32
+
33
+ Example:
34
+ >>> segment = TimeStampedSegment(0.0, 1.0, "Hello")
35
+ >>> segment.shift(1.0)
36
+ 1.0 - 2.0: Hello
37
+ """
38
  return TimeStampedSegment(self.start + shift, self.end + shift, self.text)
39
 
40
+ def append_text(self, text: str):
41
+ """
42
+ Appends text to the segment.
43
+
44
+ Args:
45
+ text (str): The text to append.
46
+
47
+ Example:
48
+ >>> segment = TimeStampedSegment(0.0, 1.0, "Hello")
49
+ >>> segment.append_text("!")
50
+ >>> segment
51
+ 0.0 - 1.0: Hello!
52
+ """
53
  self.text += text
54
 
55
  def __eq__(self, other):
 
62
  return TimeStampedSegment(self.start, self.end, self.text + other)
63
  else:
64
  raise TypeError(f"unsupported operand type(s) for +: '{type(self).__name__}' and '{type(other).__name__}'")
 
 
 
65
 
66
+ class TimeStampedText:
67
+ """
68
+ Represents a collection of TimeStampedSegment instances.
69
 
70
+ Attributes:
71
+ segments (List[TimeStampedSegment]): The list of segments.
72
+ """
73
+ def __init__(self):
74
+ self.segments: List[TimeStampedSegment] = []
 
 
 
 
 
 
 
75
 
76
+ def add_segment(self, segment: TimeStampedSegment):
77
  """
78
+ Adds a segment to the collection.
79
+
 
80
  Args:
81
+ segment (TimeStampedSegment): The segment to add.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ Example:
84
+ >>> tst = TimeStampedText()
85
+ >>> tst.add_segment(TimeStampedSegment(0.0, 1.0, "Hello"))
86
+ >>> tst.add_segment(TimeStampedSegment(1.0, 2.0, "world"))
87
+ >>> len(tst)
88
+ 2
89
+ """
90
+ self.segments.append(segment)
91
 
92
+ def __repr__(self):
93
+ return f"TimeStampedText(segments={self.segments})"
94
 
95
+ def __iter__(self):
96
+ return iter(self.segments)
 
97
 
98
+ def __getitem__(self, index):
99
+ return self.segments[index]
100
 
101
+ def __len__(self):
102
+ return len(self.segments)
103
+
104
+ # TODO: a function from_whisper_res()
105
 
106
+ if __name__ == "__main__":
107
+ import doctest
108
+ doctest.testmod(verbose=True)