Spaces:

iisadia
/

Movie_Buff_QA

Sleeping

File size: 6,762 Bytes

import streamlit as st
import pandas as pd
import faiss
import numpy as np
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
from groq import Groq
import os

# --------------------------
# Configuration & Styling
# --------------------------
st.set_page_config(
    page_title="CineMaster AI - Movie Expert",
    page_icon="🎬",
    layout="wide",
    initial_sidebar_state="expanded"
)

st.markdown("""
<style>
    :root {
        --primary: #7017ff;
        --secondary: #ff2d55;
    }
    .header {
        background: linear-gradient(135deg, var(--primary), var(--secondary));
        color: white;
        padding: 2rem;
        border-radius: 15px;
        text-align: center;
        box-shadow: 0 4px 6px rgba(0,0,0,0.1);
        margin-bottom: 2rem;
    }
    .response-box {
        background: rgba(255,255,255,0.1);
        border-radius: 10px;
        padding: 1.5rem;
        margin: 1rem 0;
        border: 1px solid rgba(255,255,255,0.2);
    }
    .stButton>button {
        background: linear-gradient(45deg, var(--primary), var(--secondary)) !important;
        color: white !important;
        border-radius: 25px;
        padding: 0.8rem 2rem;
        font-weight: 600;
        transition: transform 0.2s;
    }
    .stButton>button:hover {
        transform: scale(1.05);
    }
    .movie-card {
        background: rgba(0,0,0,0.2);
        border-radius: 10px;
        padding: 1rem;
        margin: 0.5rem 0;
    }
</style>
""", unsafe_allow_html=True)

# --------------------------
# Data Loading & Processing
# --------------------------
@st.cache_resource
def load_movie_data():
    try:
        # Try loading wiki_movies dataset
        dataset = load_dataset("movie_rationales")
        df = pd.DataFrame(dataset)
        
        # Create synthetic movie data from Wikipedia snippets
        df['title'] = df['title'].apply(lambda x: x.replace("_", " "))
        df['context'] = "Title: " + df['title'] + "\nContent: " + df['text'].str[:500] + "..."
        return df.sample(1000)  # Return random 1000 entries
        
    except Exception as e:
        st.warning(f"Couldn't load dataset: {str(e)}. Using synthetic data.")
        movies = [
            {
                "title": "The Dark Knight",
                "context": "Title: The Dark Knight\nPlot: Batman faces the Joker in a battle for Gotham's soul...\nCast: Christian Bale, Heath Ledger\nYear: 2008\nDirector: Christopher Nolan"
            },
            {
                "title": "Inception",
                "context": "Title: Inception\nPlot: A thief who enters the dreams of others...\nCast: Leonardo DiCaprio, Tom Hardy\nYear: 2010\nDirector: Christopher Nolan"
            },
            {
                "title": "Pulp Fiction",
                "context": "Title: Pulp Fiction\nPlot: The lives of two mob hitmen, a boxer, and a gangster's wife intertwine...\nCast: John Travolta, Samuel L. Jackson\nYear: 1994\nDirector: Quentin Tarantino"
            }
        ]
        return pd.DataFrame(movies)

@st.cache_resource
def setup_retrieval(df):
    embedder = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = embedder.encode(df['context'].tolist())
    
    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(embeddings)
    return embedder, index

# --------------------------
# Groq API Functions
# --------------------------
def get_groq_response(query, context):
    try:
        client = Groq(api_key=os.getenv("GROQ_API_KEY", "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"))
        
        prompt = f"""You are a film expert analyzing this question:
        
        Question: {query}
        
        Using these verified sources:
        {context}
        
        Provide a detailed response with:
        1. 🎬 Direct Answer
        2. 📖 Explanation
        3. 🎥 Relevant Scenes
        4. 🏆 Awards/Trivia (if available)
        """
        
        response = client.chat.completions.create(
            messages=[{"role": "user", "content": prompt}],
            model="llama3-70b-8192",
            temperature=0.3
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"Error getting response: {str(e)}"

# --------------------------
# Main Application
# --------------------------
def main():
    # Load data and models
    df = load_movie_data()
    embedder, index = setup_retrieval(df)
    
    # Header Section
    st.markdown("""
    <div class="header">
        <h1>🎞️ CineMaster AI</h1>
        <h3>Your Personal Movie Encyclopedia</h3>
    </div>
    """, unsafe_allow_html=True)
    
    # Sidebar
    with st.sidebar:
        st.image("https://cdn-icons-png.flaticon.com/512/2598/2598702.png", width=120)
        st.subheader("Sample Questions")
        examples = [
            "Who played the Joker in The Dark Knight?",
            "Explain the ending of Inception",
            "List Tarantino's movies",
            "What's the plot of Pulp Fiction?",
            "Who directed The Dark Knight?"
        ]
        for ex in examples:
            st.code(ex, language="bash")
        
        st.markdown("---")
        st.markdown("**Database Info**")
        st.write(f"📊 {len(df)} movies loaded")
        st.write("🔍 Using FAISS for vector search")
        st.write("🤖 Powered by Llama 3 70B")
    
    # Main Interface
    query = st.text_input("🎯 Ask any movie question:", 
                        placeholder="e.g., 'Who played the villain in The Dark Knight?'")
    
    if st.button("🚀 Get Expert Analysis", type="primary"):
        if query:
            with st.spinner("🔍 Searching through movie database..."):
                query_embed = embedder.encode([query])
                _, indices = index.search(query_embed, 3)
                contexts = [df.iloc[i]['context'] for i in indices[0]]
                combined_context = "\n\n---\n\n".join(contexts)
                
            with st.spinner("🎥 Generating cinematic insights..."):
                answer = get_groq_response(query, combined_context)
                
            st.markdown("---")
            with st.container():
                st.markdown("## 🎬 Expert Analysis")
                st.markdown(f'<div class="response-box">{answer}</div>', unsafe_allow_html=True)
                
                st.markdown("## 📚 Reference Materials")
                for i, ctx in enumerate(contexts, 1):
                    with st.expander(f"Source {i}", expanded=(i==1)):
                        st.markdown(f'<div class="movie-card">{ctx}</div>', unsafe_allow_html=True)
        else:
            st.warning("Please enter a movie-related question")

if __name__ == "__main__":
    main()