Spaces:

chaowenguo
/

aegwe4

Running

App Files Files Community

aegwe4 / app /services /subtitle_merger.py

chaowenguo

Upload 121 files

3b13b0e verified 2 months ago

raw

history blame contribute delete

8.74 kB

	#!/usr/bin/env python
	# -- coding: UTF-8 --

	'''
	@Project: NarratoAI
	@File : subtitle_merger
	@Author : viccy
	@Date : 2025/5/6 下午4:00
	'''

	import re
	import os
	from datetime import datetime, timedelta


	def parse_time(time_str):
	"""解析时间字符串为timedelta对象"""
	hours, minutes, seconds_ms = time_str.split(':')
	seconds, milliseconds = seconds_ms.split(',')

	td = timedelta(
	hours=int(hours),
	minutes=int(minutes),
	seconds=int(seconds),
	milliseconds=int(milliseconds)
	)
	return td


	def format_time(td):
	"""将timedelta对象格式化为SRT时间字符串"""
	total_seconds = int(td.total_seconds())
	hours = total_seconds // 3600
	minutes = (total_seconds % 3600) // 60
	seconds = total_seconds % 60
	milliseconds = td.microseconds // 1000

	return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"


	def parse_edited_time_range(time_range_str):
	"""从editedTimeRange字符串中提取时间范围"""
	if not time_range_str:
	return None, None

	parts = time_range_str.split('-')
	if len(parts) != 2:
	return None, None

	start_time_str, end_time_str = parts

	# 将HH:MM:SS格式转换为timedelta
	start_h, start_m, start_s = map(int, start_time_str.split(':'))
	end_h, end_m, end_s = map(int, end_time_str.split(':'))

	start_time = timedelta(hours=start_h, minutes=start_m, seconds=start_s)
	end_time = timedelta(hours=end_h, minutes=end_m, seconds=end_s)

	return start_time, end_time


	def merge_subtitle_files(subtitle_items, output_file=None):
	"""
	合并多个SRT字幕文件

	参数:
	subtitle_items: 字典列表，每个字典包含subtitle文件路径和editedTimeRange
	output_file: 输出文件的路径，如果为None则自动生成

	返回:
	合并后的字幕文件路径
	"""
	# 按照editedTimeRange的开始时间排序
	sorted_items = sorted(subtitle_items,
	key=lambda x: parse_edited_time_range(x.get('editedTimeRange', ''))[0] or timedelta())

	merged_subtitles = []
	subtitle_index = 1

	for item in sorted_items:
	if not item.get('subtitle') or not os.path.exists(item.get('subtitle')):
	continue

	# 从editedTimeRange获取起始时间偏移
	offset_time, _ = parse_edited_time_range(item.get('editedTimeRange', ''))

	if offset_time is None:
	print(f"警告: 无法从项目 {item.get('_id')} 的editedTimeRange中提取时间范围，跳过该项")
	continue

	with open(item['subtitle'], 'r', encoding='utf-8') as file:
	content = file.read()

	# 解析字幕文件
	subtitle_blocks = re.split(r'\n\s*\n', content.strip())

	for block in subtitle_blocks:
	lines = block.strip().split('\n')
	if len(lines) < 3: # 确保块有足够的行数
	continue

	# 解析时间轴行
	time_line = lines[1]
	time_parts = time_line.split(' --> ')
	if len(time_parts) != 2:
	continue

	start_time = parse_time(time_parts[0])
	end_time = parse_time(time_parts[1])

	# 应用时间偏移
	adjusted_start_time = start_time + offset_time
	adjusted_end_time = end_time + offset_time

	# 重建字幕块
	adjusted_time_line = f"{format_time(adjusted_start_time)} --> {format_time(adjusted_end_time)}"
	text_lines = lines[2:]

	new_block = [
	str(subtitle_index),
	adjusted_time_line,
	*text_lines
	]

	merged_subtitles.append('\n'.join(new_block))
	subtitle_index += 1

	# 确定输出文件路径
	if output_file is None:
	dir_path = os.path.dirname(sorted_items[0]['subtitle'])
	first_start = parse_edited_time_range(sorted_items[0]['editedTimeRange'])[0]
	last_end = parse_edited_time_range(sorted_items[-1]['editedTimeRange'])[1]

	first_start_h, first_start_m, first_start_s = int(first_start.seconds // 3600), int((first_start.seconds % 3600) // 60), int(first_start.seconds % 60)
	last_end_h, last_end_m, last_end_s = int(last_end.seconds // 3600), int((last_end.seconds % 3600) // 60), int(last_end.seconds % 60)

	first_start_str = f"{first_start_h:02d}_{first_start_m:02d}_{first_start_s:02d}"
	last_end_str = f"{last_end_h:02d}_{last_end_m:02d}_{last_end_s:02d}"

	output_file = os.path.join(dir_path, f"merged_subtitle_{first_start_str}-{last_end_str}.srt")

	# 合并所有字幕块
	merged_content = '\n\n'.join(merged_subtitles)

	# 写入合并后的内容
	with open(output_file, 'w', encoding='utf-8') as file:
	file.write(merged_content)

	return output_file


	if __name__ == '__main__':
	# 测试数据
	test_data = [
	{'picture': '【解说】好的，各位，欢迎回到我的频道！《庆余年 2》刚开播就给了我们一个王炸！范闲在北齐"死"了？这怎么可能！',
	'timestamp': '00:00:00-00:01:15',
	'narration': '好的各位，欢迎回到我的频道！《庆余年 2》刚开播就给了我们一个王炸！范闲在北齐"死"了？这怎么可能！上集片尾那个巨大的悬念，这一集就立刻揭晓了！范闲假死归来，他面临的第一个，也是最大的难关，就是如何面对他最敬爱的，同时也是最可怕的那个人——庆帝！',
	'OST': 0,
	'_id': 1,
	'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3',
	'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt',
	'sourceTimeRange': '00:00:00-00:00:26',
	'duration': 26,
	'editedTimeRange': '00:00:00-00:00:26'
	},
	{'picture': '【解说】上一集我们看到，范闲在北齐遭遇了惊天变故，生死不明！',
	'timestamp': '00:01:15-00:04:40',
	'narration': '但我们都知道，他绝不可能就这么轻易退场！第二集一开场，范闲就已经秘密回到了京都。他的生死传闻，可不像我们想象中那样只是小范围流传，而是…',
	'OST': 0,
	'_id': 2,
	'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3',
	'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt',
	'sourceTimeRange': '00:01:15-00:01:29',
	'duration': 14,
	'editedTimeRange': '00:00:26-00:00:40'
	},
	{'picture': '【解说】"欺君之罪"！在封建王朝，这可是抄家灭族的大罪！搁一般人，肯定脚底抹油溜之大吉了。',
	'timestamp': '00:04:58-00:05:45',
	'narration': '"欺君之罪"！在封建王朝，这可是抄家灭族的大罪！搁一般人，肯定脚底抹油溜之大吉了。但范闲是谁啊？他偏要反其道而行之！他竟然决定，直接去见庆帝！冒着天大的风险，用"假死"这个事实去赌庆帝的态度！',
	'OST': 0,
	'_id': 4,
	'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3',
	'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt',
	'sourceTimeRange': '00:04:58-00:05:20',
	'duration': 22,
	'editedTimeRange': '00:00:57-00:01:19'
	},
	{'picture': '【解说】但想见庆帝，哪有那么容易？范闲艺高人胆大，竟然选择了最激进的方式——闯宫！',
	'timestamp': '00:05:45-00:06:00',
	'narration': '但想见庆帝，哪有那么容易？范闲艺高人胆大，竟然选择了最激进的方式——闯宫！',
	'OST': 0,
	'_id': 5,
	'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3',
	'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt',
	'sourceTimeRange': '00:05:45-00:05:53',
	'duration': 8,
	'editedTimeRange': '00:01:19-00:01:27'
	}
	]

	output_file = merge_subtitle_files(test_data)
	print(f"字幕文件已合并至: {output_file}")