|
import os |
|
import textwrap |
|
import scipy |
|
import numpy as np |
|
import av |
|
from pathlib import Path |
|
|
|
|
|
def clear_screen(): |
|
"""Clears the console screen.""" |
|
os.system("cls" if os.name == "nt" else "clear") |
|
|
|
|
|
def print_transcript(text): |
|
"""Prints formatted transcript text.""" |
|
wrapper = textwrap.TextWrapper(width=60) |
|
for line in wrapper.wrap(text="".join(text)): |
|
print(line) |
|
|
|
|
|
def format_time(s): |
|
"""Convert seconds (float) to SRT time format.""" |
|
hours = int(s // 3600) |
|
minutes = int((s % 3600) // 60) |
|
seconds = int(s % 60) |
|
milliseconds = int((s - int(s)) * 1000) |
|
return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}" |
|
|
|
|
|
def create_srt_file(segments, resampled_file): |
|
with open(resampled_file, 'w', encoding='utf-8') as srt_file: |
|
segment_number = 1 |
|
for segment in segments: |
|
start_time = format_time(float(segment['start'])) |
|
end_time = format_time(float(segment['end'])) |
|
text = segment['text'] |
|
|
|
srt_file.write(f"{segment_number}\n") |
|
srt_file.write(f"{start_time} --> {end_time}\n") |
|
srt_file.write(f"{text}\n\n") |
|
|
|
segment_number += 1 |
|
|
|
|
|
def resample(file: str, sr: int = 16000): |
|
""" |
|
Resample the audio file to 16kHz. |
|
|
|
Args: |
|
file (str): The audio file to open |
|
sr (int): The sample rate to resample the audio if necessary |
|
|
|
Returns: |
|
resampled_file (str): The resampled audio file |
|
""" |
|
container = av.open(file) |
|
stream = next(s for s in container.streams if s.type == 'audio') |
|
|
|
resampler = av.AudioResampler( |
|
format='s16', |
|
layout='mono', |
|
rate=sr, |
|
) |
|
|
|
resampled_file = Path(file).stem + "_resampled.wav" |
|
output_container = av.open(resampled_file, mode='w') |
|
output_stream = output_container.add_stream('pcm_s16le', rate=sr) |
|
output_stream.layout = 'mono' |
|
|
|
for frame in container.decode(audio=0): |
|
frame.pts = None |
|
resampled_frames = resampler.resample(frame) |
|
if resampled_frames is not None: |
|
for resampled_frame in resampled_frames: |
|
for packet in output_stream.encode(resampled_frame): |
|
output_container.mux(packet) |
|
|
|
for packet in output_stream.encode(None): |
|
output_container.mux(packet) |
|
|
|
output_container.close() |
|
return resampled_file |
|
|