Spaces:
Running
Running
File size: 12,761 Bytes
3b13b0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 |
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''
@Project: NarratoAI
@File : update_script
@Author : 小林同学
@Date : 2025/5/6 下午11:00
'''
import re
import os
from typing import Dict, List, Any, Tuple, Union
def extract_timestamp_from_video_path(video_path: str) -> str:
"""
从视频文件路径中提取时间戳
Args:
video_path: 视频文件路径
Returns:
提取出的时间戳,格式为 'HH:MM:SS-HH:MM:SS' 或 'HH:MM:SS,sss-HH:MM:SS,sss'
"""
# 使用正则表达式从文件名中提取时间戳
filename = os.path.basename(video_path)
# 匹配新格式: [email protected]
match_new = re.search(r'vid_(\d{2})-(\d{2})-(\d{2})-(\d{3})@(\d{2})-(\d{2})-(\d{2})-(\d{3})\.mp4', filename)
if match_new:
# 提取并格式化时间戳(包含毫秒)
start_h, start_m, start_s, start_ms = match_new.group(1), match_new.group(2), match_new.group(3), match_new.group(4)
end_h, end_m, end_s, end_ms = match_new.group(5), match_new.group(6), match_new.group(7), match_new.group(8)
return f"{start_h}:{start_m}:{start_s},{start_ms}-{end_h}:{end_m}:{end_s},{end_ms}"
# 匹配旧格式: vid-00-00-00-00-00-00.mp4
match_old = re.search(r'vid-(\d{2}-\d{2}-\d{2})-(\d{2}-\d{2}-\d{2})\.mp4', filename)
if match_old:
# 提取并格式化时间戳
start_time = match_old.group(1).replace('-', ':')
end_time = match_old.group(2).replace('-', ':')
return f"{start_time}-{end_time}"
return ""
def calculate_duration(timestamp: str) -> float:
"""
计算时间戳范围的持续时间(秒)
Args:
timestamp: 格式为 'HH:MM:SS-HH:MM:SS' 或 'HH:MM:SS,sss-HH:MM:SS,sss' 的时间戳
Returns:
持续时间(秒)
"""
try:
start_time, end_time = timestamp.split('-')
# 处理毫秒部分
if ',' in start_time:
start_parts = start_time.split(',')
start_time_parts = start_parts[0].split(':')
start_ms = float('0.' + start_parts[1]) if len(start_parts) > 1 else 0
start_h, start_m, start_s = map(int, start_time_parts)
else:
start_h, start_m, start_s = map(int, start_time.split(':'))
start_ms = 0
if ',' in end_time:
end_parts = end_time.split(',')
end_time_parts = end_parts[0].split(':')
end_ms = float('0.' + end_parts[1]) if len(end_parts) > 1 else 0
end_h, end_m, end_s = map(int, end_time_parts)
else:
end_h, end_m, end_s = map(int, end_time.split(':'))
end_ms = 0
# 转换为秒
start_seconds = start_h * 3600 + start_m * 60 + start_s + start_ms
end_seconds = end_h * 3600 + end_m * 60 + end_s + end_ms
# 计算时间差(秒)
return round(end_seconds - start_seconds, 2)
except (ValueError, AttributeError):
return 0.0
def update_script_timestamps(
script_list: List[Dict[str, Any]],
video_result: Dict[Union[str, int], str],
audio_result: Dict[Union[str, int], str] = None,
subtitle_result: Dict[Union[str, int], str] = None,
calculate_edited_timerange: bool = True
) -> List[Dict[str, Any]]:
"""
根据 video_result 中的视频文件更新 script_list 中的时间戳,添加持续时间,
并根据 audio_result 添加音频路径,根据 subtitle_result 添加字幕路径
Args:
script_list: 原始脚本列表
video_result: 视频结果字典,键为原时间戳或_id,值为视频文件路径
audio_result: 音频结果字典,键为原时间戳或_id,值为音频文件路径
subtitle_result: 字幕结果字典,键为原时间戳或_id,值为字幕文件路径
calculate_edited_timerange: 是否计算并添加成品视频中的时间范围
Returns:
更新后的脚本列表
"""
# 创建副本,避免修改原始数据
updated_script = []
# 建立ID和时间戳到视频路径和新时间戳的映射
id_timestamp_mapping = {}
for key, video_path in video_result.items():
new_timestamp = extract_timestamp_from_video_path(video_path)
if new_timestamp:
id_timestamp_mapping[key] = {
'new_timestamp': new_timestamp,
'video_path': video_path
}
# 计算累积时长,用于生成成品视频中的时间范围
accumulated_duration = 0.0
# 更新脚本中的时间戳
for item in script_list:
item_copy = item.copy()
item_id = item_copy.get('_id')
orig_timestamp = item_copy.get('timestamp', '')
# 初始化音频和字幕路径为空字符串
item_copy['audio'] = ""
item_copy['subtitle'] = ""
item_copy['video'] = "" # 初始化视频路径为空字符串
# 如果提供了音频结果字典且ID存在于音频结果中,直接使用对应的音频路径
if audio_result:
if item_id and item_id in audio_result:
item_copy['audio'] = audio_result[item_id]
elif orig_timestamp in audio_result:
item_copy['audio'] = audio_result[orig_timestamp]
# 如果提供了字幕结果字典且ID存在于字幕结果中,直接使用对应的字幕路径
if subtitle_result:
if item_id and item_id in subtitle_result:
item_copy['subtitle'] = subtitle_result[item_id]
elif orig_timestamp in subtitle_result:
item_copy['subtitle'] = subtitle_result[orig_timestamp]
# 添加视频路径
if item_id and item_id in video_result:
item_copy['video'] = video_result[item_id]
elif orig_timestamp in video_result:
item_copy['video'] = video_result[orig_timestamp]
# 更新时间戳和计算持续时间
current_duration = 0.0
if item_id and item_id in id_timestamp_mapping:
# 根据ID找到对应的新时间戳
item_copy['sourceTimeRange'] = id_timestamp_mapping[item_id]['new_timestamp']
current_duration = calculate_duration(item_copy['sourceTimeRange'])
item_copy['duration'] = current_duration
elif orig_timestamp in id_timestamp_mapping:
# 根据原始时间戳找到对应的新时间戳
item_copy['sourceTimeRange'] = id_timestamp_mapping[orig_timestamp]['new_timestamp']
current_duration = calculate_duration(item_copy['sourceTimeRange'])
item_copy['duration'] = current_duration
elif orig_timestamp:
# 对于未更新的时间戳,也计算并添加持续时间
item_copy['sourceTimeRange'] = orig_timestamp
current_duration = calculate_duration(orig_timestamp)
item_copy['duration'] = current_duration
# 计算片段在成品视频中的时间范围
if calculate_edited_timerange and current_duration > 0:
start_time_seconds = accumulated_duration
end_time_seconds = accumulated_duration + current_duration
# 将秒数转换为 HH:MM:SS 格式
start_h = int(start_time_seconds // 3600)
start_m = int((start_time_seconds % 3600) // 60)
start_s = int(start_time_seconds % 60)
end_h = int(end_time_seconds // 3600)
end_m = int((end_time_seconds % 3600) // 60)
end_s = int(end_time_seconds % 60)
item_copy['editedTimeRange'] = f"{start_h:02d}:{start_m:02d}:{start_s:02d}-{end_h:02d}:{end_m:02d}:{end_s:02d}"
# 更新累积时长
accumulated_duration = end_time_seconds
updated_script.append(item_copy)
return updated_script
if __name__ == '__main__':
list_script = [
{
'picture': '【解说】好的,各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!',
'timestamp': '00:00:00,001-00:01:15,001',
'narration': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!',
'OST': 0,
'_id': 1
},
{
'picture': '【解说】上一集我们看到,范闲在北齐遭遇了惊天变故,生死不明!',
'timestamp': '00:01:15,001-00:04:40,001',
'narration': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…',
'OST': 0,
'_id': 2
},
{
'picture': '画面切到王启年小心翼翼地向范闲汇报。',
'timestamp': '00:04:41,001-00:04:58,001',
'narration': '我发现大人的死讯不光是在民间,在官场上也它传开了,所以呢,所以啊,可不是什么好事,将来您跟陛下怎么交代,这可是欺君之罪',
'OST': 1,
'_id': 3
},
{
'picture': '【解说】"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。',
'timestamp': '00:04:58,001-00:05:45,001',
'narration': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!',
'OST': 0,
'_id': 4
},
{
'picture': '【解说】但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
'timestamp': '00:05:45,001-00:06:00,001',
'narration': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
'OST': 0,
'_id': 5
},
{
'picture': '画面切换到范闲蒙面闯入皇宫,被侍卫包围的场景。',
'timestamp': '00:06:00,001-00:06:03,001',
'narration': '抓刺客',
'OST': 1,
'_id': 6
}]
video_res = {
1: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/[email protected]',
2: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/[email protected]',
4: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/[email protected]',
5: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/[email protected]'}
audio_res = {
1: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3',
2: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3',
4: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3',
5: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3'}
sub_res = {
1: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt',
2: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt',
4: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt',
5: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt'}
# 更新并打印结果
updated_list_script = update_script_timestamps(list_script, video_res, audio_res, sub_res)
for item in updated_list_script:
print(
f"ID: {item['_id']} | Picture: {item['picture'][:20]}... | Timestamp: {item['timestamp']} | " +
f"SourceTimeRange: {item['sourceTimeRange']} | EditedTimeRange: {item.get('editedTimeRange', '')} | " +
f"Duration: {item['duration']} 秒 | Audio: {item['audio']} | Video: {item['video']} | Subtitle: {item['subtitle']}")
|