import json import os import re from typing import Dict, List, Tuple, Union import requests from openai import OpenAI def extract_clips_from_analysis(analysis_text: str) -> List[Dict]: """ Extract social media clips from the initial analysis output Args: analysis_text: The formatted analysis text from get_initial_analysis Returns: List of clip dictionaries with title, start_time, and end_time """ print(f"Starting extract_clips_from_analysis with analysis_text length: {len(analysis_text)}") clips = [] # Pattern to match clip links with timestamps # Example: [Introduction and Event Overview
40s at 03:25
] pattern = r"\[([^<]+)]*>\s*(\d+)s\s+at\s+(\d{2}):(\d{2})\s*\]" matches = re.findall(pattern, analysis_text) print(f"Found {len(matches)} matches in analysis text") for match in matches: title = match[0].strip() duration = int(match[1]) minutes = int(match[2]) seconds = int(match[3]) start_time = minutes * 60 + seconds end_time = start_time + duration clip = { "clip_title": title, "start_time": start_time, "end_time": end_time, "duration": duration, } clips.append(clip) print(f"Extracted clip: {title} ({start_time}-{end_time}s)") print(f"Total clips extracted: {len(clips)}") return clips def extract_transcript_content( transcript_data: List, start_time: float, end_time: float ) -> str: """ Extract transcript content between start and end times Args: transcript_data: List of transcript segments (TranscriptSegment objects or dicts) start_time: Start time in seconds end_time: End time in seconds Returns: Extracted transcript text """ print(f"Extracting transcript content for {start_time}-{end_time}s from {len(transcript_data)} segments") content = [] for segment in transcript_data: # Handle both TranscriptSegment objects and dictionary formats if hasattr(segment, "start_time") and hasattr(segment, "end_time"): # TranscriptSegment object segment_start = segment.start_time segment_end = segment.end_time segment_text = segment.text elif hasattr(segment, "get"): # Dictionary format segment_start = segment.get("start_time", segment.get("start", 0)) segment_end = segment.get("end_time", segment.get("end", 0)) segment_text = segment.get("text", "") else: # Handle other object types with direct attribute access segment_start = getattr(segment, "start_time", getattr(segment, "start", 0)) segment_end = getattr(segment, "end_time", getattr(segment, "end", 0)) segment_text = getattr(segment, "text", "") # Check if segment overlaps with our time range if segment_start <= end_time and segment_end >= start_time: content.append(segment_text) result = " ".join(content).strip() print(f"Extracted {len(content)} segments, total text length: {len(result)}") return result def generate_broll_queries( client: OpenAI, transcript_content: str, clip_data: Dict ) -> List[Dict]: """ Generate B-roll search queries using OpenAI based on transcript content and clip data Args: client: OpenAI client transcript_content: Transcript text for the clip timeframe clip_data: Social media clip data with timestamps Returns: List of query dictionaries with timestamps """ duration = clip_data.get("end_time", 0) - clip_data.get("start_time", 0) print(f"Generating B-roll queries for clip: {clip_data.get('clip_title', 'Unknown')}") prompt = f""" Analyze this transcript content from a social media clip and generate appropriate B-roll search queries. Clip Title: {clip_data.get('clip_title', 'Unknown')} Start Time: {clip_data.get('start_time', 0)} seconds End Time: {clip_data.get('end_time', 0)} seconds Duration: {duration} seconds Transcript Content: {transcript_content} Generate 3-5 specific search queries that would find relevant B-roll images for this content. For each query, specify the exact timestamp within the clip where it would be most relevant. Focus on: - Key people, places, or concepts mentioned - Visual metaphors or illustrations - Current events or topics discussed - Products, companies, or brands mentioned Return a JSON array with this structure: [ {{ "query": "specific search query for Google Images", "timestamp_in_clip": 5.2, "relevance_reason": "why this image is relevant at this moment" }} ] Ensure timestamps are between 0 and {duration} seconds. Make queries specific and descriptive for better image search results. """ try: response = client.chat.completions.create( model="gpt-4o", messages=[ { "role": "system", "content": "You are an expert video editor specializing in finding relevant B-roll content for social media clips. Generate specific, searchable queries that will find compelling visual content.", }, {"role": "user", "content": prompt}, ], temperature=0.3, ) response_text = response.choices[0].message.content # Extract JSON from response if "```json" in response_text and "```" in response_text.split("```json", 1)[1]: json_text = response_text.split("```json", 1)[1].split("```", 1)[0] queries = json.loads(json_text) else: queries = json.loads(response_text) print(f"Generated {len(queries)} B-roll queries") return queries except Exception as e: print(f"Error generating B-roll queries: {str(e)}") return [] def search_google_images( query: str, api_key: str, search_engine_id: str, num_results: int = 3 ) -> List[Dict]: """ Search Google Images using Custom Search API Args: query: Search query string api_key: Google API key search_engine_id: Google Custom Search Engine ID num_results: Number of results to return Returns: List of image result dictionaries """ try: url = "https://www.googleapis.com/customsearch/v1" params = { "key": api_key, "cx": search_engine_id, "q": query, "searchType": "image", "num": num_results, "safe": "active", "imgSize": "large", "imgType": "photo", } response = requests.get(url, params=params) response.raise_for_status() data = response.json() results = [] for item in data.get("items", []): result = { "title": item.get("title", ""), "image_url": item.get("link", ""), "thumbnail_url": item.get("image", {}).get("thumbnailLink", ""), "context_url": item.get("image", {}).get("contextLink", ""), "width": item.get("image", {}).get("width", 0), "height": item.get("image", {}).get("height", 0), "file_size": item.get("image", {}).get("byteSize", 0), } results.append(result) return results except Exception as e: print(f"Error searching Google Images for query '{query}': {str(e)}") return [] def process_broll_generation( transcript_data: List, analysis_text: str, google_api_key: str = None, search_engine_id: str = None, ) -> List[Dict]: """ Main processing function to generate B-roll content for social media clips Args: transcript_data: Full transcript data from TranscriptProcessor (list of TranscriptSegment objects or dicts) analysis_text: The formatted analysis output from get_initial_analysis google_api_key: Google API key for image search search_engine_id: Google Custom Search Engine ID Returns: List of processed clips with B-roll suggestions """ try: print("Starting B-roll generation process") print(f"Transcript data type: {type(transcript_data)}, length: {len(transcript_data) if transcript_data else 0}") print(f"Analysis text length: {len(analysis_text) if analysis_text else 0}") # Initialize OpenAI client client = OpenAI() # Extract clips from analysis text social_clips = extract_clips_from_analysis(analysis_text) if not social_clips: print("No clips found in analysis text") return [] processed_clips = [] for i, clip in enumerate(social_clips, 1): print(f"Processing clip {i}/{len(social_clips)}: {clip.get('clip_title', 'Unknown')}") start_time = clip.get("start_time", 0) end_time = clip.get("end_time", 0) # Extract relevant transcript content transcript_content = extract_transcript_content( transcript_data, start_time, end_time ) if not transcript_content: print(f"No transcript content found for clip {start_time}-{end_time}") processed_clips.append( { **clip, "broll_suggestions": [], "error": "No transcript content found", } ) continue # Generate B-roll queries broll_queries = generate_broll_queries(client, transcript_content, clip) broll_suggestions = [] for j, query_data in enumerate(broll_queries, 1): print(f"Processing query {j}/{len(broll_queries)}: {query_data.get('query', 'Unknown')}") query = query_data.get("query", "") timestamp = query_data.get("timestamp_in_clip", 0) reason = query_data.get("relevance_reason", "") if not query: continue # Search Google Images if API is available images = [] if google_api_key and search_engine_id: print(f"Searching Google Images for: {query}") images = search_google_images( query, google_api_key, search_engine_id ) print(f"Found {len(images)} images") else: print("Skipping Google Images search (no API credentials)") broll_suggestion = { "query": query, "timestamp_in_clip": timestamp, "absolute_timestamp": start_time + timestamp, "relevance_reason": reason, "images": images, } broll_suggestions.append(broll_suggestion) processed_clip = { **clip, "transcript_content": transcript_content, "broll_suggestions": broll_suggestions, } processed_clips.append(processed_clip) print(f"Completed processing clip {i}, found {len(broll_suggestions)} suggestions") print(f"B-roll generation complete. Processed {len(processed_clips)} clips") return processed_clips except Exception as e: print(f"Error in process_broll_generation: {str(e)}") raise e def format_broll_output(processed_clips: List[Dict]) -> str: """ Format B-roll suggestions for display in the chat interface Args: processed_clips: List of processed clips with B-roll suggestions Returns: Formatted string for display """ if not processed_clips: return "No B-roll suggestions generated." output = ["🎬 B-Roll Suggestions\n"] for i, clip in enumerate(processed_clips, 1): title = clip.get("clip_title", "Unknown Clip") start_time = clip.get("start_time", 0) end_time = clip.get("end_time", 0) # Format time display start_min, start_sec = divmod(int(start_time), 60) end_min, end_sec = divmod(int(end_time), 60) output.append(f"\n{i}. {title}") output.append(f"Time: {start_min:02d}:{start_sec:02d} - {end_min:02d}:{end_sec:02d}") broll_suggestions = clip.get("broll_suggestions", []) if not broll_suggestions: output.append("No B-roll suggestions available for this clip.") else: for j, suggestion in enumerate(broll_suggestions, 1): query = suggestion.get("query", "") timestamp = suggestion.get("timestamp_in_clip", 0) images = suggestion.get("images", []) # Format timestamp within clip ts_min, ts_sec = divmod(int(timestamp), 60) output.append(f" Query {j}: {query}") output.append(f" At: {ts_min:02d}:{ts_sec:02d}") # Show top 2 image links only if images: top_images = images[:2] for k, img in enumerate(top_images, 1): img_url = img.get("image_url", "") img_title = img.get("title", "Image") if img_url: output.append(f" Link {k}: {img_title[:50]} - {img_url}") else: output.append(" No images found for this query.") output.append("") return "\n".join(output)