File size: 8,739 Bytes
3b13b0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#!/usr/bin/env python
# -*- coding: UTF-8 -*-

'''
@Project: NarratoAI
@File   : subtitle_merger
@Author : viccy
@Date   : 2025/5/6 下午4:00 
'''

import re
import os
from datetime import datetime, timedelta


def parse_time(time_str):
    """解析时间字符串为timedelta对象"""
    hours, minutes, seconds_ms = time_str.split(':')
    seconds, milliseconds = seconds_ms.split(',')
    
    td = timedelta(
        hours=int(hours),
        minutes=int(minutes),
        seconds=int(seconds),
        milliseconds=int(milliseconds)
    )
    return td


def format_time(td):
    """将timedelta对象格式化为SRT时间字符串"""
    total_seconds = int(td.total_seconds())
    hours = total_seconds // 3600
    minutes = (total_seconds % 3600) // 60
    seconds = total_seconds % 60
    milliseconds = td.microseconds // 1000
    
    return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"


def parse_edited_time_range(time_range_str):
    """从editedTimeRange字符串中提取时间范围"""
    if not time_range_str:
        return None, None
    
    parts = time_range_str.split('-')
    if len(parts) != 2:
        return None, None
    
    start_time_str, end_time_str = parts
    
    # 将HH:MM:SS格式转换为timedelta
    start_h, start_m, start_s = map(int, start_time_str.split(':'))
    end_h, end_m, end_s = map(int, end_time_str.split(':'))
    
    start_time = timedelta(hours=start_h, minutes=start_m, seconds=start_s)
    end_time = timedelta(hours=end_h, minutes=end_m, seconds=end_s)
    
    return start_time, end_time


def merge_subtitle_files(subtitle_items, output_file=None):
    """
    合并多个SRT字幕文件
    
    参数:
        subtitle_items: 字典列表,每个字典包含subtitle文件路径和editedTimeRange
        output_file: 输出文件的路径,如果为None则自动生成
    
    返回:
        合并后的字幕文件路径
    """
    # 按照editedTimeRange的开始时间排序
    sorted_items = sorted(subtitle_items, 
                         key=lambda x: parse_edited_time_range(x.get('editedTimeRange', ''))[0] or timedelta())
    
    merged_subtitles = []
    subtitle_index = 1
    
    for item in sorted_items:
        if not item.get('subtitle') or not os.path.exists(item.get('subtitle')):
            continue
            
        # 从editedTimeRange获取起始时间偏移
        offset_time, _ = parse_edited_time_range(item.get('editedTimeRange', ''))
        
        if offset_time is None:
            print(f"警告: 无法从项目 {item.get('_id')} 的editedTimeRange中提取时间范围,跳过该项")
            continue
        
        with open(item['subtitle'], 'r', encoding='utf-8') as file:
            content = file.read()
            
        # 解析字幕文件
        subtitle_blocks = re.split(r'\n\s*\n', content.strip())
        
        for block in subtitle_blocks:
            lines = block.strip().split('\n')
            if len(lines) < 3:  # 确保块有足够的行数
                continue
                
            # 解析时间轴行
            time_line = lines[1]
            time_parts = time_line.split(' --> ')
            if len(time_parts) != 2:
                continue
                
            start_time = parse_time(time_parts[0])
            end_time = parse_time(time_parts[1])
            
            # 应用时间偏移
            adjusted_start_time = start_time + offset_time
            adjusted_end_time = end_time + offset_time
            
            # 重建字幕块
            adjusted_time_line = f"{format_time(adjusted_start_time)} --> {format_time(adjusted_end_time)}"
            text_lines = lines[2:]
            
            new_block = [
                str(subtitle_index),
                adjusted_time_line,
                *text_lines
            ]
            
            merged_subtitles.append('\n'.join(new_block))
            subtitle_index += 1
    
    # 确定输出文件路径
    if output_file is None:
        dir_path = os.path.dirname(sorted_items[0]['subtitle'])
        first_start = parse_edited_time_range(sorted_items[0]['editedTimeRange'])[0]
        last_end = parse_edited_time_range(sorted_items[-1]['editedTimeRange'])[1]
        
        first_start_h, first_start_m, first_start_s = int(first_start.seconds // 3600), int((first_start.seconds % 3600) // 60), int(first_start.seconds % 60)
        last_end_h, last_end_m, last_end_s = int(last_end.seconds // 3600), int((last_end.seconds % 3600) // 60), int(last_end.seconds % 60)
        
        first_start_str = f"{first_start_h:02d}_{first_start_m:02d}_{first_start_s:02d}"
        last_end_str = f"{last_end_h:02d}_{last_end_m:02d}_{last_end_s:02d}"
        
        output_file = os.path.join(dir_path, f"merged_subtitle_{first_start_str}-{last_end_str}.srt")
    
    # 合并所有字幕块
    merged_content = '\n\n'.join(merged_subtitles)
    
    # 写入合并后的内容
    with open(output_file, 'w', encoding='utf-8') as file:
        file.write(merged_content)
    
    return output_file


if __name__ == '__main__':
    # 测试数据
    test_data = [
        {'picture': '【解说】好的,各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!', 
         'timestamp': '00:00:00-00:01:15', 
         'narration': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!', 
         'OST': 0, 
         '_id': 1, 
         'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3', 
         'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt', 
         'sourceTimeRange': '00:00:00-00:00:26', 
         'duration': 26, 
         'editedTimeRange': '00:00:00-00:00:26'
        },
        {'picture': '【解说】上一集我们看到,范闲在北齐遭遇了惊天变故,生死不明!', 
         'timestamp': '00:01:15-00:04:40', 
         'narration': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…', 
         'OST': 0, 
         '_id': 2, 
         'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3', 
         'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt', 
         'sourceTimeRange': '00:01:15-00:01:29', 
         'duration': 14, 
         'editedTimeRange': '00:00:26-00:00:40'
        },
        {'picture': '【解说】"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。', 
         'timestamp': '00:04:58-00:05:45', 
         'narration': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!', 
         'OST': 0, 
         '_id': 4, 
         'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3', 
         'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt', 
         'sourceTimeRange': '00:04:58-00:05:20', 
         'duration': 22, 
         'editedTimeRange': '00:00:57-00:01:19'
        },
        {'picture': '【解说】但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!', 
         'timestamp': '00:05:45-00:06:00', 
         'narration': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!', 
         'OST': 0, 
         '_id': 5, 
         'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3', 
         'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt', 
         'sourceTimeRange': '00:05:45-00:05:53', 
         'duration': 8, 
         'editedTimeRange': '00:01:19-00:01:27'
        }
    ]
    
    output_file = merge_subtitle_files(test_data)
    print(f"字幕文件已合并至: {output_file}")