Spaces:

chaowenguo
/

aegwe4

Running

App Files Files Community

aegwe4 / app /services /update_script.py

chaowenguo

Upload 121 files

3b13b0e verified 2 months ago

raw

history blame contribute delete

12.8 kB

	#!/usr/bin/env python
	# -- coding: UTF-8 --

	'''
	@Project: NarratoAI
	@File : update_script
	@Author : 小林同学
	@Date : 2025/5/6 下午11:00
	'''

	import re
	import os
	from typing import Dict, List, Any, Tuple, Union


	def extract_timestamp_from_video_path(video_path: str) -> str:
	"""
	从视频文件路径中提取时间戳

	Args:
	video_path: 视频文件路径

	Returns:
	提取出的时间戳，格式为 'HH:MM:SS-HH:MM:SS' 或 'HH:MM:SS,sss-HH:MM:SS,sss'
	"""
	# 使用正则表达式从文件名中提取时间戳
	filename = os.path.basename(video_path)

	# 匹配新格式: [email protected]
	match_new = re.search(r'vid_(\d{2})-(\d{2})-(\d{2})-(\d{3})@(\d{2})-(\d{2})-(\d{2})-(\d{3})\.mp4', filename)
	if match_new:
	# 提取并格式化时间戳（包含毫秒）
	start_h, start_m, start_s, start_ms = match_new.group(1), match_new.group(2), match_new.group(3), match_new.group(4)
	end_h, end_m, end_s, end_ms = match_new.group(5), match_new.group(6), match_new.group(7), match_new.group(8)
	return f"{start_h}:{start_m}:{start_s},{start_ms}-{end_h}:{end_m}:{end_s},{end_ms}"

	# 匹配旧格式: vid-00-00-00-00-00-00.mp4
	match_old = re.search(r'vid-(\d{2}-\d{2}-\d{2})-(\d{2}-\d{2}-\d{2})\.mp4', filename)
	if match_old:
	# 提取并格式化时间戳
	start_time = match_old.group(1).replace('-', ':')
	end_time = match_old.group(2).replace('-', ':')
	return f"{start_time}-{end_time}"

	return ""


	def calculate_duration(timestamp: str) -> float:
	"""
	计算时间戳范围的持续时间（秒）

	Args:
	timestamp: 格式为 'HH:MM:SS-HH:MM:SS' 或 'HH:MM:SS,sss-HH:MM:SS,sss' 的时间戳

	Returns:
	持续时间（秒）
	"""
	try:
	start_time, end_time = timestamp.split('-')

	# 处理毫秒部分
	if ',' in start_time:
	start_parts = start_time.split(',')
	start_time_parts = start_parts[0].split(':')
	start_ms = float('0.' + start_parts[1]) if len(start_parts) > 1 else 0
	start_h, start_m, start_s = map(int, start_time_parts)
	else:
	start_h, start_m, start_s = map(int, start_time.split(':'))
	start_ms = 0

	if ',' in end_time:
	end_parts = end_time.split(',')
	end_time_parts = end_parts[0].split(':')
	end_ms = float('0.' + end_parts[1]) if len(end_parts) > 1 else 0
	end_h, end_m, end_s = map(int, end_time_parts)
	else:
	end_h, end_m, end_s = map(int, end_time.split(':'))
	end_ms = 0

	# 转换为秒
	start_seconds = start_h * 3600 + start_m * 60 + start_s + start_ms
	end_seconds = end_h * 3600 + end_m * 60 + end_s + end_ms

	# 计算时间差（秒）
	return round(end_seconds - start_seconds, 2)
	except (ValueError, AttributeError):
	return 0.0


	def update_script_timestamps(
	script_list: List[Dict[str, Any]],
	video_result: Dict[Union[str, int], str],
	audio_result: Dict[Union[str, int], str] = None,
	subtitle_result: Dict[Union[str, int], str] = None,
	calculate_edited_timerange: bool = True
	) -> List[Dict[str, Any]]:
	"""
	根据 video_result 中的视频文件更新 script_list 中的时间戳，添加持续时间，
	并根据 audio_result 添加音频路径，根据 subtitle_result 添加字幕路径

	Args:
	script_list: 原始脚本列表
	video_result: 视频结果字典，键为原时间戳或_id，值为视频文件路径
	audio_result: 音频结果字典，键为原时间戳或_id，值为音频文件路径
	subtitle_result: 字幕结果字典，键为原时间戳或_id，值为字幕文件路径
	calculate_edited_timerange: 是否计算并添加成品视频中的时间范围

	Returns:
	更新后的脚本列表
	"""
	# 创建副本，避免修改原始数据
	updated_script = []

	# 建立ID和时间戳到视频路径和新时间戳的映射
	id_timestamp_mapping = {}
	for key, video_path in video_result.items():
	new_timestamp = extract_timestamp_from_video_path(video_path)
	if new_timestamp:
	id_timestamp_mapping[key] = {
	'new_timestamp': new_timestamp,
	'video_path': video_path
	}

	# 计算累积时长，用于生成成品视频中的时间范围
	accumulated_duration = 0.0

	# 更新脚本中的时间戳
	for item in script_list:
	item_copy = item.copy()
	item_id = item_copy.get('_id')
	orig_timestamp = item_copy.get('timestamp', '')

	# 初始化音频和字幕路径为空字符串
	item_copy['audio'] = ""
	item_copy['subtitle'] = ""
	item_copy['video'] = "" # 初始化视频路径为空字符串

	# 如果提供了音频结果字典且ID存在于音频结果中，直接使用对应的音频路径
	if audio_result:
	if item_id and item_id in audio_result:
	item_copy['audio'] = audio_result[item_id]
	elif orig_timestamp in audio_result:
	item_copy['audio'] = audio_result[orig_timestamp]

	# 如果提供了字幕结果字典且ID存在于字幕结果中，直接使用对应的字幕路径
	if subtitle_result:
	if item_id and item_id in subtitle_result:
	item_copy['subtitle'] = subtitle_result[item_id]
	elif orig_timestamp in subtitle_result:
	item_copy['subtitle'] = subtitle_result[orig_timestamp]

	# 添加视频路径
	if item_id and item_id in video_result:
	item_copy['video'] = video_result[item_id]
	elif orig_timestamp in video_result:
	item_copy['video'] = video_result[orig_timestamp]

	# 更新时间戳和计算持续时间
	current_duration = 0.0
	if item_id and item_id in id_timestamp_mapping:
	# 根据ID找到对应的新时间戳
	item_copy['sourceTimeRange'] = id_timestamp_mapping[item_id]['new_timestamp']
	current_duration = calculate_duration(item_copy['sourceTimeRange'])
	item_copy['duration'] = current_duration
	elif orig_timestamp in id_timestamp_mapping:
	# 根据原始时间戳找到对应的新时间戳
	item_copy['sourceTimeRange'] = id_timestamp_mapping[orig_timestamp]['new_timestamp']
	current_duration = calculate_duration(item_copy['sourceTimeRange'])
	item_copy['duration'] = current_duration
	elif orig_timestamp:
	# 对于未更新的时间戳，也计算并添加持续时间
	item_copy['sourceTimeRange'] = orig_timestamp
	current_duration = calculate_duration(orig_timestamp)
	item_copy['duration'] = current_duration

	# 计算片段在成品视频中的时间范围
	if calculate_edited_timerange and current_duration > 0:
	start_time_seconds = accumulated_duration
	end_time_seconds = accumulated_duration + current_duration

	# 将秒数转换为 HH:MM:SS 格式
	start_h = int(start_time_seconds // 3600)
	start_m = int((start_time_seconds % 3600) // 60)
	start_s = int(start_time_seconds % 60)

	end_h = int(end_time_seconds // 3600)
	end_m = int((end_time_seconds % 3600) // 60)
	end_s = int(end_time_seconds % 60)

	item_copy['editedTimeRange'] = f"{start_h:02d}:{start_m:02d}:{start_s:02d}-{end_h:02d}:{end_m:02d}:{end_s:02d}"

	# 更新累积时长
	accumulated_duration = end_time_seconds

	updated_script.append(item_copy)

	return updated_script


	if __name__ == '__main__':
	list_script = [
	{
	'picture': '【解说】好的，各位，欢迎回到我的频道！《庆余年 2》刚开播就给了我们一个王炸！范闲在北齐"死"了？这怎么可能！',
	'timestamp': '00:00:00,001-00:01:15,001',
	'narration': '好的各位，欢迎回到我的频道！《庆余年 2》刚开播就给了我们一个王炸！范闲在北齐"死"了？这怎么可能！上集片尾那个巨大的悬念，这一集就立刻揭晓了！范闲假死归来，他面临的第一个，也是最大的难关，就是如何面对他最敬爱的，同时也是最可怕的那个人——庆帝！',
	'OST': 0,
	'_id': 1
	},
	{
	'picture': '【解说】上一集我们看到，范闲在北齐遭遇了惊天变故，生死不明！',
	'timestamp': '00:01:15,001-00:04:40,001',
	'narration': '但我们都知道，他绝不可能就这么轻易退场！第二集一开场，范闲就已经秘密回到了京都。他的生死传闻，可不像我们想象中那样只是小范围流传，而是…',
	'OST': 0,
	'_id': 2
	},
	{
	'picture': '画面切到王启年小心翼翼地向范闲汇报。',
	'timestamp': '00:04:41,001-00:04:58,001',
	'narration': '我发现大人的死讯不光是在民间,在官场上也它传开了,所以呢,所以啊,可不是什么好事,将来您跟陛下怎么交代,这可是欺君之罪',
	'OST': 1,
	'_id': 3
	},
	{
	'picture': '【解说】"欺君之罪"！在封建王朝，这可是抄家灭族的大罪！搁一般人，肯定脚底抹油溜之大吉了。',
	'timestamp': '00:04:58,001-00:05:45,001',
	'narration': '"欺君之罪"！在封建王朝，这可是抄家灭族的大罪！搁一般人，肯定脚底抹油溜之大吉了。但范闲是谁啊？他偏要反其道而行之！他竟然决定，直接去见庆帝！冒着天大的风险，用"假死"这个事实去赌庆帝的态度！',
	'OST': 0,
	'_id': 4
	},
	{
	'picture': '【解说】但想见庆帝，哪有那么容易？范闲艺高人胆大，竟然选择了最激进的方式——闯宫！',
	'timestamp': '00:05:45,001-00:06:00,001',
	'narration': '但想见庆帝，哪有那么容易？范闲艺高人胆大，竟然选择了最激进的方式——闯宫！',
	'OST': 0,
	'_id': 5
	},
	{
	'picture': '画面切换到范闲蒙面闯入皇宫，被侍卫包围的场景。',
	'timestamp': '00:06:00,001-00:06:03,001',
	'narration': '抓刺客',
	'OST': 1,
	'_id': 6
	}]
	video_res = {
	1: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/[email protected]',
	2: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/[email protected]',
	4: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/[email protected]',
	5: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/[email protected]'}
	audio_res = {
	1: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3',
	2: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3',
	4: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3',
	5: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3'}
	sub_res = {
	1: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt',
	2: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt',
	4: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt',
	5: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt'}

	# 更新并打印结果
	updated_list_script = update_script_timestamps(list_script, video_res, audio_res, sub_res)
	for item in updated_list_script:
	print(
	f"ID: {item['_id']} \| Picture: {item['picture'][:20]}... \| Timestamp: {item['timestamp']} \| " +
	f"SourceTimeRange: {item['sourceTimeRange']} \| EditedTimeRange: {item.get('editedTimeRange', '')} \| " +
	f"Duration: {item['duration']} 秒 \| Audio: {item['audio']} \| Video: {item['video']} \| Subtitle: {item['subtitle']}")