File size: 13,905 Bytes
f098be9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
import json
import os
import re
from typing import Dict, List, Tuple, Union

import requests
from openai import OpenAI


def extract_clips_from_analysis(analysis_text: str) -> List[Dict]:
    """
    Extract social media clips from the initial analysis output

    Args:
        analysis_text: The formatted analysis text from get_initial_analysis

    Returns:
        List of clip dictionaries with title, start_time, and end_time
    """
    print(f"Starting extract_clips_from_analysis with analysis_text length: {len(analysis_text)}")
    clips = []

    # Pattern to match clip links with timestamps
    # Example: [Introduction and Event Overview <div id='topic' style="display: inline"> 40s at 03:25 </div>]
    pattern = r"\[([^<]+)<div[^>]*>\s*(\d+)s\s+at\s+(\d{2}):(\d{2})\s*</div>\]"

    matches = re.findall(pattern, analysis_text)
    print(f"Found {len(matches)} matches in analysis text")

    for match in matches:
        title = match[0].strip()
        duration = int(match[1])
        minutes = int(match[2])
        seconds = int(match[3])

        start_time = minutes * 60 + seconds
        end_time = start_time + duration

        clip = {
            "clip_title": title,
            "start_time": start_time,
            "end_time": end_time,
            "duration": duration,
        }
        clips.append(clip)
        print(f"Extracted clip: {title} ({start_time}-{end_time}s)")

    print(f"Total clips extracted: {len(clips)}")
    return clips


def extract_transcript_content(
    transcript_data: List, start_time: float, end_time: float
) -> str:
    """
    Extract transcript content between start and end times

    Args:
        transcript_data: List of transcript segments (TranscriptSegment objects or dicts)
        start_time: Start time in seconds
        end_time: End time in seconds

    Returns:
        Extracted transcript text
    """
    print(f"Extracting transcript content for {start_time}-{end_time}s from {len(transcript_data)} segments")
    content = []

    for segment in transcript_data:
        # Handle both TranscriptSegment objects and dictionary formats
        if hasattr(segment, "start_time") and hasattr(segment, "end_time"):
            # TranscriptSegment object
            segment_start = segment.start_time
            segment_end = segment.end_time
            segment_text = segment.text
        elif hasattr(segment, "get"):
            # Dictionary format
            segment_start = segment.get("start_time", segment.get("start", 0))
            segment_end = segment.get("end_time", segment.get("end", 0))
            segment_text = segment.get("text", "")
        else:
            # Handle other object types with direct attribute access
            segment_start = getattr(segment, "start_time", getattr(segment, "start", 0))
            segment_end = getattr(segment, "end_time", getattr(segment, "end", 0))
            segment_text = getattr(segment, "text", "")

        # Check if segment overlaps with our time range
        if segment_start <= end_time and segment_end >= start_time:
            content.append(segment_text)

    result = " ".join(content).strip()
    print(f"Extracted {len(content)} segments, total text length: {len(result)}")
    return result


def generate_broll_queries(
    client: OpenAI, transcript_content: str, clip_data: Dict
) -> List[Dict]:
    """
    Generate B-roll search queries using OpenAI based on transcript content and clip data

    Args:
        client: OpenAI client
        transcript_content: Transcript text for the clip timeframe
        clip_data: Social media clip data with timestamps

    Returns:
        List of query dictionaries with timestamps
    """
    duration = clip_data.get("end_time", 0) - clip_data.get("start_time", 0)
    print(f"Generating B-roll queries for clip: {clip_data.get('clip_title', 'Unknown')}")

    prompt = f"""
    Analyze this transcript content from a social media clip and generate appropriate B-roll search queries.

    Clip Title: {clip_data.get('clip_title', 'Unknown')}
    Start Time: {clip_data.get('start_time', 0)} seconds
    End Time: {clip_data.get('end_time', 0)} seconds
    Duration: {duration} seconds

    Transcript Content:
    {transcript_content}

    Generate 3-5 specific search queries that would find relevant B-roll images for this content.
    For each query, specify the exact timestamp within the clip where it would be most relevant.

    Focus on:
    - Key people, places, or concepts mentioned
    - Visual metaphors or illustrations
    - Current events or topics discussed
    - Products, companies, or brands mentioned

    Return a JSON array with this structure:
    [
        {{
            "query": "specific search query for Google Images",
            "timestamp_in_clip": 5.2,
            "relevance_reason": "why this image is relevant at this moment"
        }}
    ]

    Ensure timestamps are between 0 and {duration} seconds.
    Make queries specific and descriptive for better image search results.
    """

    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "system",
                    "content": "You are an expert video editor specializing in finding relevant B-roll content for social media clips. Generate specific, searchable queries that will find compelling visual content.",
                },
                {"role": "user", "content": prompt},
            ],
            temperature=0.3,
        )

        response_text = response.choices[0].message.content

        # Extract JSON from response
        if "```json" in response_text and "```" in response_text.split("```json", 1)[1]:
            json_text = response_text.split("```json", 1)[1].split("```", 1)[0]
            queries = json.loads(json_text)
        else:
            queries = json.loads(response_text)

        print(f"Generated {len(queries)} B-roll queries")
        return queries

    except Exception as e:
        print(f"Error generating B-roll queries: {str(e)}")
        return []


def search_google_images(
    query: str, api_key: str, search_engine_id: str, num_results: int = 3
) -> List[Dict]:
    """
    Search Google Images using Custom Search API

    Args:
        query: Search query string
        api_key: Google API key
        search_engine_id: Google Custom Search Engine ID
        num_results: Number of results to return

    Returns:
        List of image result dictionaries
    """
    try:
        url = "https://www.googleapis.com/customsearch/v1"
        params = {
            "key": api_key,
            "cx": search_engine_id,
            "q": query,
            "searchType": "image",
            "num": num_results,
            "safe": "active",
            "imgSize": "large",
            "imgType": "photo",
        }

        response = requests.get(url, params=params)
        response.raise_for_status()

        data = response.json()
        results = []

        for item in data.get("items", []):
            result = {
                "title": item.get("title", ""),
                "image_url": item.get("link", ""),
                "thumbnail_url": item.get("image", {}).get("thumbnailLink", ""),
                "context_url": item.get("image", {}).get("contextLink", ""),
                "width": item.get("image", {}).get("width", 0),
                "height": item.get("image", {}).get("height", 0),
                "file_size": item.get("image", {}).get("byteSize", 0),
            }
            results.append(result)

        return results

    except Exception as e:
        print(f"Error searching Google Images for query '{query}': {str(e)}")
        return []


def process_broll_generation(
    transcript_data: List,
    analysis_text: str,
    google_api_key: str = None,
    search_engine_id: str = None,
) -> List[Dict]:
    """
    Main processing function to generate B-roll content for social media clips

    Args:
        transcript_data: Full transcript data from TranscriptProcessor (list of TranscriptSegment objects or dicts)
        analysis_text: The formatted analysis output from get_initial_analysis
        google_api_key: Google API key for image search
        search_engine_id: Google Custom Search Engine ID

    Returns:
        List of processed clips with B-roll suggestions
    """
    try:
        print("Starting B-roll generation process")
        print(f"Transcript data type: {type(transcript_data)}, length: {len(transcript_data) if transcript_data else 0}")
        print(f"Analysis text length: {len(analysis_text) if analysis_text else 0}")
        
        # Initialize OpenAI client
        client = OpenAI()

        # Extract clips from analysis text
        social_clips = extract_clips_from_analysis(analysis_text)

        if not social_clips:
            print("No clips found in analysis text")
            return []

        processed_clips = []

        for i, clip in enumerate(social_clips, 1):
            print(f"Processing clip {i}/{len(social_clips)}: {clip.get('clip_title', 'Unknown')}")
            start_time = clip.get("start_time", 0)
            end_time = clip.get("end_time", 0)

            # Extract relevant transcript content
            transcript_content = extract_transcript_content(
                transcript_data, start_time, end_time
            )

            if not transcript_content:
                print(f"No transcript content found for clip {start_time}-{end_time}")
                processed_clips.append(
                    {
                        **clip,
                        "broll_suggestions": [],
                        "error": "No transcript content found",
                    }
                )
                continue

            # Generate B-roll queries
            broll_queries = generate_broll_queries(client, transcript_content, clip)

            broll_suggestions = []

            for j, query_data in enumerate(broll_queries, 1):
                print(f"Processing query {j}/{len(broll_queries)}: {query_data.get('query', 'Unknown')}")
                query = query_data.get("query", "")
                timestamp = query_data.get("timestamp_in_clip", 0)
                reason = query_data.get("relevance_reason", "")

                if not query:
                    continue

                # Search Google Images if API is available
                images = []
                if google_api_key and search_engine_id:
                    print(f"Searching Google Images for: {query}")
                    images = search_google_images(
                        query, google_api_key, search_engine_id
                    )
                    print(f"Found {len(images)} images")
                else:
                    print("Skipping Google Images search (no API credentials)")

                broll_suggestion = {
                    "query": query,
                    "timestamp_in_clip": timestamp,
                    "absolute_timestamp": start_time + timestamp,
                    "relevance_reason": reason,
                    "images": images,
                }
                broll_suggestions.append(broll_suggestion)

            processed_clip = {
                **clip,
                "transcript_content": transcript_content,
                "broll_suggestions": broll_suggestions,
            }
            processed_clips.append(processed_clip)
            print(f"Completed processing clip {i}, found {len(broll_suggestions)} suggestions")

        print(f"B-roll generation complete. Processed {len(processed_clips)} clips")
        return processed_clips

    except Exception as e:
        print(f"Error in process_broll_generation: {str(e)}")
        raise e


def format_broll_output(processed_clips: List[Dict]) -> str:
    """
    Format B-roll suggestions for display in the chat interface

    Args:
        processed_clips: List of processed clips with B-roll suggestions

    Returns:
        Formatted string for display
    """
    if not processed_clips:
        return "No B-roll suggestions generated."

    output = ["🎬 B-Roll Suggestions\n"]

    for i, clip in enumerate(processed_clips, 1):
        title = clip.get("clip_title", "Unknown Clip")
        start_time = clip.get("start_time", 0)
        end_time = clip.get("end_time", 0)

        # Format time display
        start_min, start_sec = divmod(int(start_time), 60)
        end_min, end_sec = divmod(int(end_time), 60)

        output.append(f"\n{i}. {title}")
        output.append(f"Time: {start_min:02d}:{start_sec:02d} - {end_min:02d}:{end_sec:02d}")

        broll_suggestions = clip.get("broll_suggestions", [])

        if not broll_suggestions:
            output.append("No B-roll suggestions available for this clip.")
        else:
            for j, suggestion in enumerate(broll_suggestions, 1):
                query = suggestion.get("query", "")
                timestamp = suggestion.get("timestamp_in_clip", 0)
                images = suggestion.get("images", [])

                # Format timestamp within clip
                ts_min, ts_sec = divmod(int(timestamp), 60)

                output.append(f"  Query {j}: {query}")
                output.append(f"  At: {ts_min:02d}:{ts_sec:02d}")

                # Show top 2 image links only
                if images:
                    top_images = images[:2]
                    for k, img in enumerate(top_images, 1):
                        img_url = img.get("image_url", "")
                        img_title = img.get("title", "Image")
                        if img_url:
                            output.append(f"    Link {k}: {img_title[:50]} - {img_url}")
                else:
                    output.append("    No images found for this query.")

        output.append("")

    return "\n".join(output)