nuernie
initial commit
7222c68
import os
import textwrap
import scipy
import numpy as np
import av
from pathlib import Path
def clear_screen():
"""Clears the console screen."""
os.system("cls" if os.name == "nt" else "clear")
def print_transcript(text):
"""Prints formatted transcript text."""
wrapper = textwrap.TextWrapper(width=60)
for line in wrapper.wrap(text="".join(text)):
print(line)
def format_time(s):
"""Convert seconds (float) to SRT time format."""
hours = int(s // 3600)
minutes = int((s % 3600) // 60)
seconds = int(s % 60)
milliseconds = int((s - int(s)) * 1000)
return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
def create_srt_file(segments, resampled_file):
with open(resampled_file, 'w', encoding='utf-8') as srt_file:
segment_number = 1
for segment in segments:
start_time = format_time(float(segment['start']))
end_time = format_time(float(segment['end']))
text = segment['text']
srt_file.write(f"{segment_number}\n")
srt_file.write(f"{start_time} --> {end_time}\n")
srt_file.write(f"{text}\n\n")
segment_number += 1
def resample(file: str, sr: int = 16000):
"""
Resample the audio file to 16kHz.
Args:
file (str): The audio file to open
sr (int): The sample rate to resample the audio if necessary
Returns:
resampled_file (str): The resampled audio file
"""
container = av.open(file)
stream = next(s for s in container.streams if s.type == 'audio')
resampler = av.AudioResampler(
format='s16',
layout='mono',
rate=sr,
)
resampled_file = Path(file).stem + "_resampled.wav"
output_container = av.open(resampled_file, mode='w')
output_stream = output_container.add_stream('pcm_s16le', rate=sr)
output_stream.layout = 'mono'
for frame in container.decode(audio=0):
frame.pts = None
resampled_frames = resampler.resample(frame)
if resampled_frames is not None:
for resampled_frame in resampled_frames:
for packet in output_stream.encode(resampled_frame):
output_container.mux(packet)
for packet in output_stream.encode(None):
output_container.mux(packet)
output_container.close()
return resampled_file