File size: 12,761 Bytes
3b13b0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
#!/usr/bin/env python
# -*- coding: UTF-8 -*-

'''
@Project: NarratoAI
@File   : update_script
@Author : 小林同学
@Date   : 2025/5/6 下午11:00 
'''

import re
import os
from typing import Dict, List, Any, Tuple, Union


def extract_timestamp_from_video_path(video_path: str) -> str:
    """
    从视频文件路径中提取时间戳
    
    Args:
        video_path: 视频文件路径
    
    Returns:
        提取出的时间戳,格式为 'HH:MM:SS-HH:MM:SS' 或 'HH:MM:SS,sss-HH:MM:SS,sss'
    """
    # 使用正则表达式从文件名中提取时间戳
    filename = os.path.basename(video_path)
    
    # 匹配新格式: [email protected]
    match_new = re.search(r'vid_(\d{2})-(\d{2})-(\d{2})-(\d{3})@(\d{2})-(\d{2})-(\d{2})-(\d{3})\.mp4', filename)
    if match_new:
        # 提取并格式化时间戳(包含毫秒)
        start_h, start_m, start_s, start_ms = match_new.group(1), match_new.group(2), match_new.group(3), match_new.group(4)
        end_h, end_m, end_s, end_ms = match_new.group(5), match_new.group(6), match_new.group(7), match_new.group(8)
        return f"{start_h}:{start_m}:{start_s},{start_ms}-{end_h}:{end_m}:{end_s},{end_ms}"
    
    # 匹配旧格式: vid-00-00-00-00-00-00.mp4
    match_old = re.search(r'vid-(\d{2}-\d{2}-\d{2})-(\d{2}-\d{2}-\d{2})\.mp4', filename)
    if match_old:
        # 提取并格式化时间戳
        start_time = match_old.group(1).replace('-', ':')
        end_time = match_old.group(2).replace('-', ':')
        return f"{start_time}-{end_time}"

    return ""


def calculate_duration(timestamp: str) -> float:
    """
    计算时间戳范围的持续时间(秒)
    
    Args:
        timestamp: 格式为 'HH:MM:SS-HH:MM:SS' 或 'HH:MM:SS,sss-HH:MM:SS,sss' 的时间戳
    
    Returns:
        持续时间(秒)
    """
    try:
        start_time, end_time = timestamp.split('-')

        # 处理毫秒部分
        if ',' in start_time:
            start_parts = start_time.split(',')
            start_time_parts = start_parts[0].split(':')
            start_ms = float('0.' + start_parts[1]) if len(start_parts) > 1 else 0
            start_h, start_m, start_s = map(int, start_time_parts)
        else:
            start_h, start_m, start_s = map(int, start_time.split(':'))
            start_ms = 0

        if ',' in end_time:
            end_parts = end_time.split(',')
            end_time_parts = end_parts[0].split(':')
            end_ms = float('0.' + end_parts[1]) if len(end_parts) > 1 else 0
            end_h, end_m, end_s = map(int, end_time_parts)
        else:
            end_h, end_m, end_s = map(int, end_time.split(':'))
            end_ms = 0

        # 转换为秒
        start_seconds = start_h * 3600 + start_m * 60 + start_s + start_ms
        end_seconds = end_h * 3600 + end_m * 60 + end_s + end_ms

        # 计算时间差(秒)
        return round(end_seconds - start_seconds, 2)
    except (ValueError, AttributeError):
        return 0.0


def update_script_timestamps(
    script_list: List[Dict[str, Any]], 
    video_result: Dict[Union[str, int], str], 
    audio_result: Dict[Union[str, int], str] = None,
    subtitle_result: Dict[Union[str, int], str] = None,
    calculate_edited_timerange: bool = True
) -> List[Dict[str, Any]]:
    """
    根据 video_result 中的视频文件更新 script_list 中的时间戳,添加持续时间,
    并根据 audio_result 添加音频路径,根据 subtitle_result 添加字幕路径
    
    Args:
        script_list: 原始脚本列表
        video_result: 视频结果字典,键为原时间戳或_id,值为视频文件路径
        audio_result: 音频结果字典,键为原时间戳或_id,值为音频文件路径
        subtitle_result: 字幕结果字典,键为原时间戳或_id,值为字幕文件路径
        calculate_edited_timerange: 是否计算并添加成品视频中的时间范围
    
    Returns:
        更新后的脚本列表
    """
    # 创建副本,避免修改原始数据
    updated_script = []

    # 建立ID和时间戳到视频路径和新时间戳的映射
    id_timestamp_mapping = {}
    for key, video_path in video_result.items():
        new_timestamp = extract_timestamp_from_video_path(video_path)
        if new_timestamp:
            id_timestamp_mapping[key] = {
                'new_timestamp': new_timestamp,
                'video_path': video_path
            }

    # 计算累积时长,用于生成成品视频中的时间范围
    accumulated_duration = 0.0
    
    # 更新脚本中的时间戳
    for item in script_list:
        item_copy = item.copy()
        item_id = item_copy.get('_id')
        orig_timestamp = item_copy.get('timestamp', '')

        # 初始化音频和字幕路径为空字符串
        item_copy['audio'] = ""
        item_copy['subtitle'] = ""
        item_copy['video'] = ""  # 初始化视频路径为空字符串

        # 如果提供了音频结果字典且ID存在于音频结果中,直接使用对应的音频路径
        if audio_result:
            if item_id and item_id in audio_result:
                item_copy['audio'] = audio_result[item_id]
            elif orig_timestamp in audio_result:
                item_copy['audio'] = audio_result[orig_timestamp]

        # 如果提供了字幕结果字典且ID存在于字幕结果中,直接使用对应的字幕路径
        if subtitle_result:
            if item_id and item_id in subtitle_result:
                item_copy['subtitle'] = subtitle_result[item_id]
            elif orig_timestamp in subtitle_result:
                item_copy['subtitle'] = subtitle_result[orig_timestamp]

        # 添加视频路径
        if item_id and item_id in video_result:
            item_copy['video'] = video_result[item_id]
        elif orig_timestamp in video_result:
            item_copy['video'] = video_result[orig_timestamp]

        # 更新时间戳和计算持续时间
        current_duration = 0.0
        if item_id and item_id in id_timestamp_mapping:
            # 根据ID找到对应的新时间戳
            item_copy['sourceTimeRange'] = id_timestamp_mapping[item_id]['new_timestamp']
            current_duration = calculate_duration(item_copy['sourceTimeRange'])
            item_copy['duration'] = current_duration
        elif orig_timestamp in id_timestamp_mapping:
            # 根据原始时间戳找到对应的新时间戳
            item_copy['sourceTimeRange'] = id_timestamp_mapping[orig_timestamp]['new_timestamp']
            current_duration = calculate_duration(item_copy['sourceTimeRange'])
            item_copy['duration'] = current_duration
        elif orig_timestamp:
            # 对于未更新的时间戳,也计算并添加持续时间
            item_copy['sourceTimeRange'] = orig_timestamp
            current_duration = calculate_duration(orig_timestamp)
            item_copy['duration'] = current_duration
            
        # 计算片段在成品视频中的时间范围
        if calculate_edited_timerange and current_duration > 0:
            start_time_seconds = accumulated_duration
            end_time_seconds = accumulated_duration + current_duration
            
            # 将秒数转换为 HH:MM:SS 格式
            start_h = int(start_time_seconds // 3600)
            start_m = int((start_time_seconds % 3600) // 60)
            start_s = int(start_time_seconds % 60)
            
            end_h = int(end_time_seconds // 3600)
            end_m = int((end_time_seconds % 3600) // 60)
            end_s = int(end_time_seconds % 60)
            
            item_copy['editedTimeRange'] = f"{start_h:02d}:{start_m:02d}:{start_s:02d}-{end_h:02d}:{end_m:02d}:{end_s:02d}"
            
            # 更新累积时长
            accumulated_duration = end_time_seconds

        updated_script.append(item_copy)

    return updated_script


if __name__ == '__main__':
    list_script = [
        {
            'picture': '【解说】好的,各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!',
            'timestamp': '00:00:00,001-00:01:15,001',
            'narration': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!',
            'OST': 0,
            '_id': 1
        },
        {
            'picture': '【解说】上一集我们看到,范闲在北齐遭遇了惊天变故,生死不明!',
            'timestamp': '00:01:15,001-00:04:40,001',
            'narration': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…',
            'OST': 0,
            '_id': 2
        },
        {
            'picture': '画面切到王启年小心翼翼地向范闲汇报。',
            'timestamp': '00:04:41,001-00:04:58,001',
            'narration': '我发现大人的死讯不光是在民间,在官场上也它传开了,所以呢,所以啊,可不是什么好事,将来您跟陛下怎么交代,这可是欺君之罪',
            'OST': 1,
            '_id': 3
        },
        {
            'picture': '【解说】"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。',
            'timestamp': '00:04:58,001-00:05:45,001',
            'narration': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!',
            'OST': 0,
            '_id': 4
        },
        {
            'picture': '【解说】但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
            'timestamp': '00:05:45,001-00:06:00,001',
            'narration': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
            'OST': 0,
            '_id': 5
        },
        {
            'picture': '画面切换到范闲蒙面闯入皇宫,被侍卫包围的场景。',
            'timestamp': '00:06:00,001-00:06:03,001',
            'narration': '抓刺客',
            'OST': 1,
            '_id': 6
        }]
    video_res = {
        1: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/[email protected]',
        2: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/[email protected]',
        4: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/[email protected]',
        5: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/[email protected]'}
    audio_res = {
        1: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3',
        2: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3',
        4: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3',
        5: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3'}
    sub_res = {
        1: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt',
        2: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt',
        4: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt',
        5: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt'}
    
    # 更新并打印结果
    updated_list_script = update_script_timestamps(list_script, video_res, audio_res, sub_res)
    for item in updated_list_script:
        print(
            f"ID: {item['_id']} | Picture: {item['picture'][:20]}... | Timestamp: {item['timestamp']} | " +
            f"SourceTimeRange: {item['sourceTimeRange']} | EditedTimeRange: {item.get('editedTimeRange', '')} | " +
            f"Duration: {item['duration']} 秒 | Audio: {item['audio']} | Video: {item['video']} | Subtitle: {item['subtitle']}")