Spaces:
Running
Running
import json | |
import os | |
import re | |
from typing import Dict, List, Tuple, Union | |
import requests | |
from openai import OpenAI | |
def extract_clips_from_analysis(analysis_text: str) -> List[Dict]: | |
""" | |
Extract social media clips from the initial analysis output | |
Args: | |
analysis_text: The formatted analysis text from get_initial_analysis | |
Returns: | |
List of clip dictionaries with title, start_time, and end_time | |
""" | |
print(f"Starting extract_clips_from_analysis with analysis_text length: {len(analysis_text)}") | |
clips = [] | |
# Pattern to match clip links with timestamps | |
# Example: [Introduction and Event Overview <div id='topic' style="display: inline"> 40s at 03:25 </div>] | |
pattern = r"\[([^<]+)<div[^>]*>\s*(\d+)s\s+at\s+(\d{2}):(\d{2})\s*</div>\]" | |
matches = re.findall(pattern, analysis_text) | |
print(f"Found {len(matches)} matches in analysis text") | |
for match in matches: | |
title = match[0].strip() | |
duration = int(match[1]) | |
minutes = int(match[2]) | |
seconds = int(match[3]) | |
start_time = minutes * 60 + seconds | |
end_time = start_time + duration | |
clip = { | |
"clip_title": title, | |
"start_time": start_time, | |
"end_time": end_time, | |
"duration": duration, | |
} | |
clips.append(clip) | |
print(f"Extracted clip: {title} ({start_time}-{end_time}s)") | |
print(f"Total clips extracted: {len(clips)}") | |
return clips | |
def extract_transcript_content( | |
transcript_data: List, start_time: float, end_time: float | |
) -> str: | |
""" | |
Extract transcript content between start and end times | |
Args: | |
transcript_data: List of transcript segments (TranscriptSegment objects or dicts) | |
start_time: Start time in seconds | |
end_time: End time in seconds | |
Returns: | |
Extracted transcript text | |
""" | |
print(f"Extracting transcript content for {start_time}-{end_time}s from {len(transcript_data)} segments") | |
content = [] | |
for segment in transcript_data: | |
# Handle both TranscriptSegment objects and dictionary formats | |
if hasattr(segment, "start_time") and hasattr(segment, "end_time"): | |
# TranscriptSegment object | |
segment_start = segment.start_time | |
segment_end = segment.end_time | |
segment_text = segment.text | |
elif hasattr(segment, "get"): | |
# Dictionary format | |
segment_start = segment.get("start_time", segment.get("start", 0)) | |
segment_end = segment.get("end_time", segment.get("end", 0)) | |
segment_text = segment.get("text", "") | |
else: | |
# Handle other object types with direct attribute access | |
segment_start = getattr(segment, "start_time", getattr(segment, "start", 0)) | |
segment_end = getattr(segment, "end_time", getattr(segment, "end", 0)) | |
segment_text = getattr(segment, "text", "") | |
# Check if segment overlaps with our time range | |
if segment_start <= end_time and segment_end >= start_time: | |
content.append(segment_text) | |
result = " ".join(content).strip() | |
print(f"Extracted {len(content)} segments, total text length: {len(result)}") | |
return result | |
def generate_broll_queries( | |
client: OpenAI, transcript_content: str, clip_data: Dict | |
) -> List[Dict]: | |
""" | |
Generate B-roll search queries using OpenAI based on transcript content and clip data | |
Args: | |
client: OpenAI client | |
transcript_content: Transcript text for the clip timeframe | |
clip_data: Social media clip data with timestamps | |
Returns: | |
List of query dictionaries with timestamps | |
""" | |
duration = clip_data.get("end_time", 0) - clip_data.get("start_time", 0) | |
print(f"Generating B-roll queries for clip: {clip_data.get('clip_title', 'Unknown')}") | |
prompt = f""" | |
Analyze this transcript content from a social media clip and generate appropriate B-roll search queries. | |
Clip Title: {clip_data.get('clip_title', 'Unknown')} | |
Start Time: {clip_data.get('start_time', 0)} seconds | |
End Time: {clip_data.get('end_time', 0)} seconds | |
Duration: {duration} seconds | |
Transcript Content: | |
{transcript_content} | |
Generate 3-5 specific search queries that would find relevant B-roll images for this content. | |
For each query, specify the exact timestamp within the clip where it would be most relevant. | |
Focus on: | |
- Key people, places, or concepts mentioned | |
- Visual metaphors or illustrations | |
- Current events or topics discussed | |
- Products, companies, or brands mentioned | |
Return a JSON array with this structure: | |
[ | |
{{ | |
"query": "specific search query for Google Images", | |
"timestamp_in_clip": 5.2, | |
"relevance_reason": "why this image is relevant at this moment" | |
}} | |
] | |
Ensure timestamps are between 0 and {duration} seconds. | |
Make queries specific and descriptive for better image search results. | |
""" | |
try: | |
response = client.chat.completions.create( | |
model="gpt-4o", | |
messages=[ | |
{ | |
"role": "system", | |
"content": "You are an expert video editor specializing in finding relevant B-roll content for social media clips. Generate specific, searchable queries that will find compelling visual content.", | |
}, | |
{"role": "user", "content": prompt}, | |
], | |
temperature=0.3, | |
) | |
response_text = response.choices[0].message.content | |
# Extract JSON from response | |
if "```json" in response_text and "```" in response_text.split("```json", 1)[1]: | |
json_text = response_text.split("```json", 1)[1].split("```", 1)[0] | |
queries = json.loads(json_text) | |
else: | |
queries = json.loads(response_text) | |
print(f"Generated {len(queries)} B-roll queries") | |
return queries | |
except Exception as e: | |
print(f"Error generating B-roll queries: {str(e)}") | |
return [] | |
def search_google_images( | |
query: str, api_key: str, search_engine_id: str, num_results: int = 3 | |
) -> List[Dict]: | |
""" | |
Search Google Images using Custom Search API | |
Args: | |
query: Search query string | |
api_key: Google API key | |
search_engine_id: Google Custom Search Engine ID | |
num_results: Number of results to return | |
Returns: | |
List of image result dictionaries | |
""" | |
try: | |
url = "https://www.googleapis.com/customsearch/v1" | |
params = { | |
"key": api_key, | |
"cx": search_engine_id, | |
"q": query, | |
"searchType": "image", | |
"num": num_results, | |
"safe": "active", | |
"imgSize": "large", | |
"imgType": "photo", | |
} | |
response = requests.get(url, params=params) | |
response.raise_for_status() | |
data = response.json() | |
results = [] | |
for item in data.get("items", []): | |
result = { | |
"title": item.get("title", ""), | |
"image_url": item.get("link", ""), | |
"thumbnail_url": item.get("image", {}).get("thumbnailLink", ""), | |
"context_url": item.get("image", {}).get("contextLink", ""), | |
"width": item.get("image", {}).get("width", 0), | |
"height": item.get("image", {}).get("height", 0), | |
"file_size": item.get("image", {}).get("byteSize", 0), | |
} | |
results.append(result) | |
return results | |
except Exception as e: | |
print(f"Error searching Google Images for query '{query}': {str(e)}") | |
return [] | |
def process_broll_generation( | |
transcript_data: List, | |
analysis_text: str, | |
google_api_key: str = None, | |
search_engine_id: str = None, | |
) -> List[Dict]: | |
""" | |
Main processing function to generate B-roll content for social media clips | |
Args: | |
transcript_data: Full transcript data from TranscriptProcessor (list of TranscriptSegment objects or dicts) | |
analysis_text: The formatted analysis output from get_initial_analysis | |
google_api_key: Google API key for image search | |
search_engine_id: Google Custom Search Engine ID | |
Returns: | |
List of processed clips with B-roll suggestions | |
""" | |
try: | |
print("Starting B-roll generation process") | |
print(f"Transcript data type: {type(transcript_data)}, length: {len(transcript_data) if transcript_data else 0}") | |
print(f"Analysis text length: {len(analysis_text) if analysis_text else 0}") | |
# Initialize OpenAI client | |
client = OpenAI() | |
# Extract clips from analysis text | |
social_clips = extract_clips_from_analysis(analysis_text) | |
if not social_clips: | |
print("No clips found in analysis text") | |
return [] | |
processed_clips = [] | |
for i, clip in enumerate(social_clips, 1): | |
print(f"Processing clip {i}/{len(social_clips)}: {clip.get('clip_title', 'Unknown')}") | |
start_time = clip.get("start_time", 0) | |
end_time = clip.get("end_time", 0) | |
# Extract relevant transcript content | |
transcript_content = extract_transcript_content( | |
transcript_data, start_time, end_time | |
) | |
if not transcript_content: | |
print(f"No transcript content found for clip {start_time}-{end_time}") | |
processed_clips.append( | |
{ | |
**clip, | |
"broll_suggestions": [], | |
"error": "No transcript content found", | |
} | |
) | |
continue | |
# Generate B-roll queries | |
broll_queries = generate_broll_queries(client, transcript_content, clip) | |
broll_suggestions = [] | |
for j, query_data in enumerate(broll_queries, 1): | |
print(f"Processing query {j}/{len(broll_queries)}: {query_data.get('query', 'Unknown')}") | |
query = query_data.get("query", "") | |
timestamp = query_data.get("timestamp_in_clip", 0) | |
reason = query_data.get("relevance_reason", "") | |
if not query: | |
continue | |
# Search Google Images if API is available | |
images = [] | |
if google_api_key and search_engine_id: | |
print(f"Searching Google Images for: {query}") | |
images = search_google_images( | |
query, google_api_key, search_engine_id | |
) | |
print(f"Found {len(images)} images") | |
else: | |
print("Skipping Google Images search (no API credentials)") | |
broll_suggestion = { | |
"query": query, | |
"timestamp_in_clip": timestamp, | |
"absolute_timestamp": start_time + timestamp, | |
"relevance_reason": reason, | |
"images": images, | |
} | |
broll_suggestions.append(broll_suggestion) | |
processed_clip = { | |
**clip, | |
"transcript_content": transcript_content, | |
"broll_suggestions": broll_suggestions, | |
} | |
processed_clips.append(processed_clip) | |
print(f"Completed processing clip {i}, found {len(broll_suggestions)} suggestions") | |
print(f"B-roll generation complete. Processed {len(processed_clips)} clips") | |
return processed_clips | |
except Exception as e: | |
print(f"Error in process_broll_generation: {str(e)}") | |
raise e | |
def format_broll_output(processed_clips: List[Dict]) -> str: | |
""" | |
Format B-roll suggestions for display in the chat interface | |
Args: | |
processed_clips: List of processed clips with B-roll suggestions | |
Returns: | |
Formatted string for display | |
""" | |
if not processed_clips: | |
return "No B-roll suggestions generated." | |
output = ["🎬 B-Roll Suggestions\n"] | |
for i, clip in enumerate(processed_clips, 1): | |
title = clip.get("clip_title", "Unknown Clip") | |
start_time = clip.get("start_time", 0) | |
end_time = clip.get("end_time", 0) | |
# Format time display | |
start_min, start_sec = divmod(int(start_time), 60) | |
end_min, end_sec = divmod(int(end_time), 60) | |
output.append(f"\n{i}. {title}") | |
output.append(f"Time: {start_min:02d}:{start_sec:02d} - {end_min:02d}:{end_sec:02d}") | |
broll_suggestions = clip.get("broll_suggestions", []) | |
if not broll_suggestions: | |
output.append("No B-roll suggestions available for this clip.") | |
else: | |
for j, suggestion in enumerate(broll_suggestions, 1): | |
query = suggestion.get("query", "") | |
timestamp = suggestion.get("timestamp_in_clip", 0) | |
images = suggestion.get("images", []) | |
# Format timestamp within clip | |
ts_min, ts_sec = divmod(int(timestamp), 60) | |
output.append(f" Query {j}: {query}") | |
output.append(f" At: {ts_min:02d}:{ts_sec:02d}") | |
# Show top 2 image links only | |
if images: | |
top_images = images[:2] | |
for k, img in enumerate(top_images, 1): | |
img_url = img.get("image_url", "") | |
img_title = img.get("title", "Image") | |
if img_url: | |
output.append(f" Link {k}: {img_title[:50]} - {img_url}") | |
else: | |
output.append(" No images found for this query.") | |
output.append("") | |
return "\n".join(output) | |