Spaces:

SameerArz
/

Hackathon

Sleeping

File size: 8,817 Bytes

import gradio as gr
from groq import Groq
import os
import threading
from moviepy.editor import TextClip, concatenate_videoclips, AudioFileClip, ColorClip
import tempfile

# Initialize Groq client with your API key
client = Groq(api_key=os.environ["GROQ_API_KEY"])

# Load Text-to-Image Models
model1 = gr.load("models/prithivMLmods/SD3.5-Turbo-Realism-2.0-LoRA")
model2 = gr.load("models/Purz/face-projection")

# Stop event for threading (image generation)
stop_event = threading.Event()

# Function to generate tutor output (lesson, question, feedback)
def generate_tutor_output(subject, difficulty, student_input):
    prompt = f"""
    You are an expert tutor in {subject} at the {difficulty} level. 
    The student has provided the following input: "{student_input}"
    
    Please generate:
    1. A brief, engaging lesson on the topic (2-3 paragraphs)
    2. A thought-provoking question to check understanding
    3. Constructive feedback on the student's input
    
    Format your response as a JSON object with keys: "lesson", "question", "feedback"
    """
    
    completion = client.chat.completions.create(
        messages=[{
            "role": "system",
            "content": f"You are the world's best AI tutor, renowned for your ability to explain complex concepts in an engaging, clear, and memorable way and giving math examples. Your expertise in {subject} is unparalleled, and you're adept at tailoring your teaching to {difficulty} level students."
        }, {
            "role": "user",
            "content": prompt,
        }],
        model="mixtral-8x7b-32768",
        max_tokens=1000,
    )
    
    return completion.choices[0].message.content

# Function to generate images based on model selection
def generate_images(text, selected_model):
    stop_event.clear()

    if selected_model == "Model 1 (Turbo Realism)":
        model = model1
    elif selected_model == "Model 2 (Face Projection)":
        model = model2
    else:
        return ["Invalid model selection."] * 3

    results = []
    for i in range(3):
        if stop_event.is_set():
            return ["Image generation stopped by user."] * 3

        modified_text = f"{text} variation {i+1}"
        result = model(modified_text)
        results.append(result)

    return results

# New function to generate text-to-video with voice
def generate_text_to_video(text):
    try:
        # Generate narration using Groq (text-to-speech simulation)
        narration_prompt = f"Convert this text to a natural-sounding narration: {text}"
        narration_response = client.chat.completions.create(
            messages=[{
                "role": "system",
                "content": "You are an AI voice generator that produces natural, human-like speech."
            }, {
                "role": "user",
                "content": narration_prompt,
            }],
            model="mixtral-8x7b-32768",
            max_tokens=500,
        )
        narration_text = narration_response.choices[0].message.content

        # Simulate TTS by saving text as audio (placeholder; in reality, use a TTS API)
        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
            # Here, you'd use a real TTS service (e.g., Google TTS, ElevenLabs)
            # For now, we'll simulate with a silent audio clip
            audio_duration = len(narration_text.split()) / 2  # Rough estimate: 2 words per second
            audio = ColorClip(size=(100, 100), color=(0, 0, 0), duration=audio_duration).set_audio(None)
            audio.write_audiofile(temp_audio.name, fps=44100, logger=None)

        # Create video clips from text
        clips = []
        words = narration_text.split()
        chunk_size = 10  # Display 10 words at a time
        for i in range(0, len(words), chunk_size):
            chunk = " ".join(words[i:i + chunk_size])
            clip = TextClip(chunk, fontsize=50, color='white', size=(1280, 720), bg_color='black')
            clip = clip.set_duration(audio_duration / (len(words) / chunk_size))  # Evenly split duration
            clips.append(clip)

        # Concatenate clips into a single video
        final_video = concatenate_videoclips(clips)
        
        # Add audio to video
        audio_clip = AudioFileClip(temp_audio.name)
        final_video = final_video.set_audio(audio_clip)

        # Save video to temporary file
        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video:
            final_video.write_videofile(temp_video.name, fps=24, logger=None)
            video_path = temp_video.name

        # Clean up temporary audio file
        os.unlink(temp_audio.name)

        return video_path
    except Exception as e:
        return f"Error generating video: {str(e)}"

# Set up the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# 🎓 Your AI Tutor with Visuals & Images")

    # Section for generating Text-based output (lesson, question, feedback)
    with gr.Row():
        with gr.Column(scale=2):
            subject = gr.Dropdown(
                ["Math", "Science", "History", "Literature", "Code", "AI"], 
                label="Subject", 
                info="Choose the subject of your lesson"
            )
            difficulty = gr.Radio(
                ["Beginner", "Intermediate", "Advanced"], 
                label="Difficulty Level", 
                info="Select your proficiency level"
            )
            student_input = gr.Textbox(
                placeholder="Type your query here...", 
                label="Your Input", 
                info="Enter the topic you want to learn"
            )
            submit_button_text = gr.Button("Generate Lesson & Question", variant="primary")
        
        with gr.Column(scale=3):
            lesson_output = gr.Markdown(label="Lesson")
            question_output = gr.Markdown(label="Comprehension Question")
            feedback_output = gr.Markdown(label="Feedback")
    
    # Section for generating Visual output
    with gr.Row():
        with gr.Column(scale=2):
            model_selector = gr.Radio(
                ["Model 1 (Turbo Realism)", "Model 2 (Face Projection)"],
                label="Select Image Generation Model",
                value="Model 1 (Turbo Realism)"
            )
            submit_button_visual = gr.Button("Generate Visuals", variant="primary")
            submit_button_video = gr.Button("Generate Video with Voice", variant="primary")  # New button
        
        with gr.Column(scale=3):
            output1 = gr.Image(label="Generated Image 1")
            output2 = gr.Image(label="Generated Image 2")
            output3 = gr.Image(label="Generated Image 3")
            video_output = gr.Video(label="Generated Video with Voice")  # New video output
    
    gr.Markdown("""
    ### How to Use
    1. **Text Section**: Select a subject and difficulty, type your query, and click 'Generate Lesson & Question' to get your personalized lesson, comprehension question, and feedback.
    2. **Visual Section**: Select the model for image generation, then click 'Generate Visuals' to receive 3 variations of an image based on your topic. Click 'Generate Video with Voice' to create a video with narration.
    3. Review the AI-generated content to enhance your learning experience!
    """)
    
    def process_output_text(subject, difficulty, student_input):
        try:
            tutor_output = generate_tutor_output(subject, difficulty, student_input)
            parsed = eval(tutor_output)
            return parsed["lesson"], parsed["question"], parsed["feedback"]
        except:
            return "Error parsing output", "No question available", "No feedback available"
    
    def process_output_visual(text, selected_model):
        try:
            images = generate_images(text, selected_model)
            return images[0], images[1], images[2]
        except:
            return None, None, None
    
    def process_output_video(text):
        try:
            video_path = generate_text_to_video(text)
            return video_path
        except:
            return None

    # Generate Text-based Output
    submit_button_text.click(
        fn=process_output_text,
        inputs=[subject, difficulty, student_input],
        outputs=[lesson_output, question_output, feedback_output]
    )
    
    # Generate Visual Output
    submit_button_visual.click(
        fn=process_output_visual,
        inputs=[student_input, model_selector],
        outputs=[output1, output2, output3]
    )
    
    # Generate Video Output
    submit_button_video.click(
        fn=process_output_video,
        inputs=[student_input],
        outputs=[video_output]
    )

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)