import gradio as gr from google import genai from google.genai import types from PIL import Image from io import BytesIO import base64 import os import json import random import urllib.parse import time import gradio # Check Gradio version required_version = "4.44.0" current_version = gradio.__version__ if current_version < required_version: raise ValueError(f"Gradio version {current_version} is outdated. Please upgrade to {required_version} or later using 'pip install gradio=={required_version}'.") # Initialize the Google Generative AI client with the API key from environment variables try: api_key = os.environ['GEMINI_API_KEY'] except KeyError: raise ValueError("Please set the GEMINI_API_KEY environment variable.") client = genai.Client(api_key=api_key) # Define safety settings to disable all filters for content generation SAFETY_SETTINGS = [ types.SafetySetting( category=types.HarmCategory.HARM_CATEGORY_HARASSMENT, threshold=types.HarmBlockThreshold.BLOCK_NONE, ), types.SafetySetting( category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH, threshold=types.HarmBlockThreshold.BLOCK_NONE, ), types.SafetySetting( category=types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, threshold=types.HarmBlockThreshold.BLOCK_NONE, ), types.SafetySetting( category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, threshold=types.HarmBlockThreshold.BLOCK_NONE, ), types.SafetySetting( category=types.HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY, threshold=types.HarmBlockThreshold.BLOCK_NONE, ), ] def clean_response_text(response_text): """ Clean the API response by removing Markdown code block markers. Args: response_text (str): The raw response text from the API. Returns: str: The cleaned response text. """ cleaned_text = response_text.strip() if cleaned_text.startswith("```json"): cleaned_text = cleaned_text[len("```json"):].strip() if cleaned_text.endswith("```"): cleaned_text = cleaned_text[:-len("```")].strip() return cleaned_text def generate_ideas(user_input): """ Generate a diverse set of ideas based on the user's input concept using the LLM. Yields progress updates for the loading UI. Args: user_input (str): The user's input concept or idea. Yields: tuple: (progress_percentage, message) for progress_html updates. list: Final list of ideas as strings. """ yield (10, f"Brainstorming epic ideas for {user_input}... 🌟") prompt = f""" The user has provided the concept: "{user_input}". You must generate 5 diverse and creative ideas for a TikTok video that are directly and explicitly related to "{user_input}". Each idea must clearly incorporate and focus on the core theme of "{user_input}" without deviating into unrelated topics. Each idea should be a short sentence describing a specific scene or concept. Return the response as a JSON object with a single key 'ideas' containing a list of 5 ideas. Ensure the response is strictly in JSON format. Example for "blindfolded Rubik's Cube challenge": {{"ideas": [ "A blindfolded speedcubing competition with dramatic music", "A close-up of a person solving a Rubik's Cube blindfolded under a spotlight", "A time-lapse of a blindfolded Rubik's Cube solve with colorful lighting", "A blindfolded Rubik's Cube challenge in a futuristic setting", "A split-screen of two people racing to solve a Rubik's Cube blindfolded" ]}} """ try: response = client.models.generate_content( model='gemini-2.0-flash-lite', contents=[prompt], config=types.GenerateContentConfig( temperature=1.2, safety_settings=SAFETY_SETTINGS ) ) print(f"Raw response for ideas: {response.text}") # Debugging if not response.text or response.text.isspace(): raise ValueError("Empty response from API") cleaned_text = clean_response_text(response.text) response_json = json.loads(cleaned_text) if 'ideas' not in response_json or not isinstance(response_json['ideas'], list) or len(response_json['ideas']) != 5: raise ValueError("Invalid JSON format: 'ideas' key missing, not a list, or incorrect length") ideas = response_json['ideas'] yield (20, f"Ideas locked in for {user_input}! 🚀") return ideas except Exception as e: print(f"Error generating ideas: {e}") yield (20, f"Oops, tweaking the plan for {user_input}... 🔧") return [ f"A dramatic {user_input} scene with cinematic lighting", f"A close-up of {user_input} in a futuristic setting", f"A high-energy {user_input} moment with vibrant colors", f"A serene {user_input} scene with soft focus", f"An action-packed {user_input} challenge with dynamic angles" ] def generate_item(user_input, ideas, generate_video=False, max_retries=3): """ Generate a single feed item (image and optionally one video) using one of the ideas. Yields progress updates for the loading UI. Args: user_input (str): The user's input concept or idea. ideas (list): List of ideas to choose from. generate_video (bool): Whether to generate a video from the image. max_retries (int): Maximum number of retries for image generation per cycle. Yields: tuple: (progress_percentage, message) for progress_html updates. dict: Final dictionary with 'text', 'image_base64', 'video_base64', and 'ideas'. """ video_base64 = None max_total_attempts = 3 # Maximum total attempts for combined image and video generation cycles total_attempts = 0 while total_attempts < max_total_attempts: total_attempts += 1 yield (20 + total_attempts * 10, f"Attempt {total_attempts} to craft your {user_input} masterpiece... 🎨") # Step 1: Generate an image (retry up to max_retries times) generated_image = None text = None img_str = None image_prompt = None for image_attempt in range(max_retries): yield (20 + total_attempts * 10 + image_attempt * 5, f"Crafting a stunning image for {user_input}... 📸") selected_idea = random.choice(ideas) prompt = f""" The user has provided the concept: "{user_input}". Based on this concept and the specific idea "{selected_idea}", create content for a TikTok video. Return a JSON object with two keys: - 'caption': A short, viral TikTok-style caption with hashtags that reflects "{user_input}". - 'image_prompt': A detailed image prompt for generating a high-quality visual scene, ensuring the theme of "{user_input}" is central. The image prompt should describe the scene vividly, specify a perspective and style, and ensure no text or letters are included. Ensure the response is strictly in JSON format. Example: {{"caption": "Blindfolded Rubik's Cube MAGIC! 🤯 #rubiks", "image_prompt": "A close-up view of a person solving a Rubik's Cube blindfolded, in a dramatic style, no text or letters"}} """ try: response = client.models.generate_content( model='gemini-2.0-flash-lite', contents=[prompt], config=types.GenerateContentConfig( temperature=1.2, safety_settings=SAFETY_SETTINGS ) ) print(f"Raw response for item (image attempt {image_attempt + 1}, total attempt {total_attempts}): {response.text}") # Debugging if not response.text or response.text.isspace(): raise ValueError("Empty response from API") cleaned_text = clean_response_text(response.text) response_json = json.loads(cleaned_text) if 'caption' not in response_json or 'image_prompt' not in response_json: raise ValueError("Invalid JSON format: 'caption' or 'image_prompt' key missing") text = response_json['caption'] image_prompt = response_json['image_prompt'] except Exception as e: print(f"Error generating item (image attempt {image_attempt + 1}, total attempt {total_attempts}): {e}") text = f"Amazing {user_input}! 🔥 #{user_input.replace(' ', '')}" image_prompt = f"A vivid scene of {selected_idea} related to {user_input}, in a vibrant pop art style, no text or letters" # Attempt to generate the image try: yield (40 + image_attempt * 5, f"Rendering your {user_input} vision... ✨") imagen = client.models.generate_images( model='imagen-3.0-generate-002', prompt=image_prompt, config=types.GenerateImagesConfig( aspect_ratio="9:16", number_of_images=1 ) ) if imagen.generated_images and len(imagen.generated_images) > 0: generated_image = imagen.generated_images[0] image = Image.open(BytesIO(generated_image.image.image_bytes)) # Ensure the image matches the desired aspect ratio (9:16 = 0.5625) target_width = 360 target_height = int(target_width / 9 * 16) # 9:16 aspect ratio image = image.resize((target_width, target_height), Image.LANCZOS) # Convert image to base64 buffered = BytesIO() image.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() yield (50, f"Image for {user_input} is ready! 🎉") break # Successfully generated image, exit image retry loop else: print(f"Image generation failed (image attempt {image_attempt + 1}, total attempt {total_attempts}): No images returned") if image_attempt == max_retries - 1: yield (50, f"Tweaking the image for {user_input}... 🔄") if total_attempts == max_total_attempts: # Max total attempts reached, use a gray placeholder image = Image.new('RGB', (360, 640), color='gray') buffered = BytesIO() image.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() yield (60, f"Using a placeholder for {user_input}... 🖼️") return { 'text': text, 'image_base64': img_str, 'video_base64': None, 'ideas': ideas } break # Exit inner loop to retry with new idea except Exception as e: print(f"Error generating image (image attempt {image_attempt + 1}, total attempt {total_attempts}): {e}") if image_attempt == max_retries - 1: yield (50, f"Retrying image for {user_input}... 🔄") if total_attempts == max_total_attempts: # Max total attempts reached, use a gray placeholder image = Image.new('RGB', (360, 640), color='gray') buffered = BytesIO() image.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() yield (60, f"Using a placeholder for {user_input}... 🖼️") return { 'text': text, 'image_base64': img_str, 'video_base64': None, 'ideas': ideas } break # Exit inner loop to retry with new idea # Step 2: Generate video if enabled (with fallback to text-to-video if image-to-video fails) if generate_video and generated_image is not None: max_video_retries_per_image = 2 # Try text-to-video generation twice if needed video_generated = False # First, try image-to-video generation (only once) try: yield (60, f"Filming a viral video for {user_input}... 🎥") video_prompt = f""" The user concept is "{user_input}". Based on this and the scene: {image_prompt}, create a video. Use a close-up shot with a slow dolly shot circling around the subject, using shallow focus on the main subject to emphasize details, in a realistic style with cinematic lighting. """ print(f"Attempting image-to-video generation (total attempt {total_attempts}): {video_prompt}") operation = client.models.generate_videos( model="veo-2.0-generate-001", prompt=video_prompt, image=generated_image.image, config=types.GenerateVideosConfig( aspect_ratio="9:16", number_of_videos=1, duration_seconds=8, negative_prompt="blurry, low quality, text, letters" ) ) # Wait for video to generate while not operation.done: time.sleep(20) operation = client.operations.get(operation) # Log detailed information about the operation print(f"Video generation operation completed: {operation}") print(f"Operation done: {operation.done}") print(f"Operation error: {operation.error}") if operation.error: print(f"Operation error message: {operation.error.message}") if hasattr(operation.error, 'code'): print(f"Operation error code: {operation.error.code}") if hasattr(operation.error, 'details'): print(f"Operation error details: {operation.error.details}") print(f"Operation response: {operation.response}") if operation.response: print(f"Operation response has generated_videos: {hasattr(operation.response, 'generated_videos')}") if hasattr(operation.response, 'generated_videos'): print(f"Generated videos: {operation.response.generated_videos}") else: print("No generated_videos attribute in response") # Enhanced error handling for video generation response if operation.error: raise ValueError(f"Video generation operation failed with error: {operation.error.message}") if operation.response is None: raise ValueError("Video generation operation failed: No response") if not hasattr(operation.response, 'generated_videos') or operation.response.generated_videos is None: raise ValueError("Video generation operation failed: No generated_videos in response") # Process the single generated video if len(operation.response.generated_videos) > 0: video = operation.response.generated_videos[0] if video is None or not hasattr(video, 'video'): raise ValueError("Video is invalid or missing video data") fname = 'with_image_input.mp4' print(f"Generated video: {fname}") # Download the video and get the raw bytes video_data = client.files.download(file=video.video) # Ensure video_data is in bytes if isinstance(video_data, bytes): video_bytes = video_data else: # If video_data is a file-like object, read the bytes video_buffer = BytesIO() for chunk in video_data: video_buffer.write(chunk) video_bytes = video_buffer.getvalue() # Encode the video bytes as base64 video_base64 = base64.b64encode(video_bytes).decode() video_generated = True yield (90, f"Video for {user_input} is a wrap! 🎬") # Successfully generated video, return the result return { 'text': text, 'image_base64': img_str, 'video_base64': video_base64, 'ideas': ideas } else: raise ValueError("No video was generated") except Exception as e: print(f"Error generating video (image-to-video, total attempt {total_attempts}): {e}") yield (70, f"Switching to a new video approach for {user_input}... 🎞️") print("Image-to-video generation failed. Falling back to text-to-video generation.") # If image-to-video generation failed, try text-to-video generation if not video_generated: for video_attempt in range(max_video_retries_per_image): try: yield (75 + video_attempt * 5, f"Trying a fresh video take for {user_input}... 📹") # Use the same video prompt but without the image video_prompt_base = f""" The user concept is "{user_input}". Based on this and the scene: {image_prompt}, create a video. Use a close-up shot with a slow dolly shot circling around the subject, using shallow focus on the main subject to emphasize details, in a realistic style with cinematic lighting. """ if video_attempt == 0: video_prompt = video_prompt_base else: video_prompt = f""" The user concept is "{user_input}". Based on this and a simplified scene: {image_prompt}, create a video. Use a static close-up shot of the subject in a realistic style. """ print(f"Attempting text-to-video generation (video attempt {video_attempt + 1}, total attempt {total_attempts}): {video_prompt}") operation = client.models.generate_videos( model="veo-2.0-generate-001", prompt=video_prompt, config=types.GenerateVideosConfig( aspect_ratio="9:16", number_of_videos=1, duration_seconds=8, negative_prompt="blurry, low quality, text, letters" ) ) # Wait for video to generate while not operation.done: time.sleep(20) operation = client.operations.get(operation) # Log detailed information about the operation print(f"Video generation operation completed: {operation}") print(f"Operation done: {operation.done}") print(f"Operation error: {operation.error}") if operation.error: print(f"Operation error message: {operation.error.message}") if hasattr(operation.error, 'code'): print(f"Operation error code: {operation.error.code}") if hasattr(operation.error, 'details'): print(f"Operation error details: {operation.error.details}") print(f"Operation response: {operation.response}") if operation.response: print(f"Operation response has generated_videos: {hasattr(operation.response, 'generated_videos')}") if hasattr(operation.response, 'generated_videos'): print(f"Generated videos: {operation.response.generated_videos}") else: print("No generated_videos attribute in response") # Enhanced error handling for video generation response if operation.error: raise ValueError(f"Video generation operation failed with error: {operation.error.message}") if operation.response is None: raise ValueError("Video generation operation failed: No response") if not hasattr(operation.response, 'generated_videos') or operation.response.generated_videos is None: raise ValueError("Video generation operation failed: No generated_videos in response") # Process the single generated video if len(operation.response.generated_videos) > 0: video = operation.response.generated_videos[0] if video is None or not hasattr(video, 'video'): raise ValueError("Video is invalid or missing video data") fname = 'text_to_video.mp4' print(f"Generated video: {fname}") # Download the video and get the raw bytes video_data = client.files.download(file=video.video) # Ensure video_data is in bytes if isinstance(video_data, bytes): video_bytes = video_data else: # If video_data is a file-like object, read the bytes video_buffer = BytesIO() for chunk in video_data: video_buffer.write(chunk) video_bytes = video_buffer.getvalue() # Encode the video bytes as base64 video_base64 = base64.b64encode(video_bytes).decode() video_generated = True yield (90, f"Video for {user_input} is a wrap! 🎬") # Successfully generated video, return the result return { 'text': text, 'image_base64': img_str, 'video_base64': video_base64, 'ideas': ideas } else: raise ValueError("No video was generated") except Exception as e: print(f"Error generating video (text-to-video attempt {video_attempt + 1}, total attempt {total_attempts}): {e}") if video_attempt == max_video_retries_per_image - 1: yield (85, f"Finalizing without video for {user_input}... 📌") if total_attempts == max_total_attempts: print("Max total attempts reached. Proceeding without video.") video_base64 = None yield (95, f"Polishing your {user_input} masterpiece... ✨") return { 'text': text, 'image_base64': img_str, 'video_base64': video_base64, 'ideas': ideas } # Text-to-video failed, break to outer loop to try a new image print(f"Text-to-video generation failed after {max_video_retries_per_image} attempts. Selecting a new idea and generating a new image.") break continue # Retry text-to-video generation with a modified prompt # If video generation is not enabled or image generation failed, return the result if img_str is not None: yield (95, f"Polishing your {user_input} masterpiece... ✨") return { 'text': text, 'image_base64': img_str, 'video_base64': video_base64, 'ideas': ideas } # If img_str is None, continue to next cycle or fall back if max attempts reached # If max total attempts reached without success, use a gray placeholder image print("Max total attempts reached without successful image generation. Using placeholder.") yield (95, f"Falling back to a placeholder for {user_input}... 🖼️") image = Image.new('RGB', (360, 640), color='gray') buffered = BytesIO() image.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() yield (100, f"Ready to roll with {user_input}! 🚀") return { 'text': f"Amazing {user_input}! 🔥 #{user_input.replace(' ', '')}", 'image_base64': img_str, 'video_base64': None, 'ideas': ideas } def generate_progress_html(progress, message, user_input): """ Generate HTML for the progress bar and witty text. Args: progress (float): Current progress percentage (0–100). message (str): Current loading message to display. user_input (str): The user's input concept or idea for context. Returns: str: HTML string for the progress bar. """ return f"""
Error generating content. Please try again!
Error generating content. Please try again!
Download the media to share:
Click a share button below to start a post with the caption, then manually upload the downloaded image or video.
""" # Generate share links for social media platforms (only passing the caption) share_links = """ """ # Add YouTube Shorts share button if a video is available youtube_share = "" if video_base64: # Only show YouTube Shorts share button if a video is generated youtube_share = f""" """ return f"""Enter a concept or idea to start your feed!