test2025SpL2

Sleeping

test2025SpL2

File size: 7,475 Bytes

9322a5c
244d99e
8987036
244d99e
 
 
 
fda07e6
244d99e
 
 
 
 
 
 
 
 
 
8533608
c3c7832
fda07e6
 
 
244d99e
 
 
 
 
 
 
 
 
 
 
6878db4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fda07e6
 
244d99e
fda07e6
 
 
 
244d99e
c5302a0
fda07e6
 
 
244d99e
fda07e6
244d99e
 
 
fda07e6
244d99e
fda07e6
244d99e
 
fda07e6
244d99e
267ce0e
fda07e6
 
244d99e
267ce0e
fda07e6
 
244d99e
 
fda07e6
6878db4
 
 
 
244d99e
6878db4
 
 
 
 
 
 
 
 
 
 
 
 
fda07e6
244d99e
fda07e6
 
244d99e
 
 
 
fda07e6
 
 
 
 
244d99e
 
fda07e6
244d99e
fda07e6
244d99e
fda07e6
 
 
244d99e
fda07e6
 
 
244d99e
fda07e6
 
 
 
 
40ac14d
fda07e6
 
6878db4
 
 
 
fda07e6
 
 
244d99e
 
 
 
 
 
 
 
 
4d65165
244d99e
 
fda07e6
244d99e
 
fda07e6
 
6878db4
 
 
 
 
 
 
 
 
 
fda07e6
 
 
6878db4
fda07e6
 
244d99e
 
 
 
fda07e6
244d99e
 
 
 
 
fda07e6
244d99e
fda07e6
244d99e
 
 
149b30a

import streamlit as st
import pandas as pd
from transformers import pipeline
import tempfile
import os

def calculate_star_rating(positive_percent):
    """Convert positive percentage to star rating"""
    if positive_percent >= 80:
        return 5
    elif positive_percent >= 60:
        return 4
    elif positive_percent >= 40:
        return 3
    elif positive_percent >= 20:
        return 2
    else:
        return 1

def main():
    st.set_page_config(page_title="Movie Review Analysis System", page_icon="🎬")

    # Custom styles
    st.markdown("""
    <style>
    .reportview-container {
        background: #f0f2f6;
    }
    .stProgress > div > div > div > div {
        background-color: #4CAF50;
    }
    </style>
    """, unsafe_allow_html=True)

    # Model loading
    with st.spinner("Loading all models, this may take a few minutes..."):
        try:
            # Sentiment analysis model
            classifier = pipeline(
                "text-classification",
                model="KeonBlackwell/movie_sentiment_model",
                tokenizer="distilbert-base-uncased"
            )

            # Keyphrase extraction model
            keyphrase_extractor = pipeline(
                "token-classification",
                model="ml6team/keyphrase-extraction-distilbert-inspec",
                aggregation_strategy="simple"
            )

            # Summarization model
            summarizer = pipeline("summarization",
                                model="facebook/bart-large-cnn")

        except Exception as e:
            st.error(f"Model loading failed: {str(e)}")
            return

    # Page layout
    st.title("🎬 Movie Review Batch Analysis System")
    st.markdown("""
    ### Instructions:
    1. Upload a CSV file containing movie reviews (must include a 'comment' column)
    2. The system will automatically analyze the sentiment of each review
    3. Generate overall ratings, keyphrase extraction, and summary reports
    """)

    # File upload
    uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])

    if uploaded_file is not None:
        # Read data
        try:
            df = pd.read_csv(uploaded_file)
            if 'comment' not in df.columns:
                st.error("The CSV file must contain a 'comment' column")
                return

            comments = df['comment'].tolist()
        except Exception as e:
            st.error(f"File reading failed: {str(e)}")
            return

        # Show preview
        with st.expander("Preview of Original Data (First 5 Rows)"):
            st.dataframe(df.head())

        if st.button("Start Analysis"):
            # Progress bar settings
            progress_bar = st.progress(0)
            status_text = st.empty()

            results = []
            total = len(comments)

            # Batch prediction
            try:
                # Sentiment analysis
                for i, comment in enumerate(comments):
                    progress = (i+1)/total
                    progress_bar.progress(progress)
                    status_text.text(f"Analyzing sentiment for {i+1}/{total} reviews...")

                    prediction = classifier(comment)[0]
                    results.append({
                        'comment': comment,
                        'sentiment': 1 if prediction['label'] == 'LABEL_1' else 0,
                        'confidence': prediction['score']
                    })

                # Convert to DataFrame
                result_df = pd.DataFrame(results)

                # Calculate statistics
                positive_count = result_df['sentiment'].sum()
                total_reviews = len(result_df)
                positive_percent = (positive_count / total_reviews) * 100
                star_rating = calculate_star_rating(positive_percent)

                # Show results
                st.success("Sentiment analysis completed!")

                # Rating display
                col1, col2, col3 = st.columns(3)
                with col1:
                    st.metric("⭐ Overall Rating", f"{star_rating} Stars")
                with col2:
                    st.metric("👍 Positive Reviews", f"{positive_count}/{total_reviews}")
                with col3:
                    st.metric("📈 Positive Ratio", f"{positive_percent:.1f}%")

                # Progress bar visualization
                st.progress(positive_percent/100)

                # Show example results
                with st.expander("View Detailed Analysis Results (First 10 Rows)"):
                    st.dataframe(result_df.head(10))

                # Keyphrase extraction and summary
                st.subheader("📌 Keyphrase Extraction and Summary of Reviews")

                # Combine all comments into a single text
                combined_text = " ".join(comments)

                # Keyphrase extraction
                with st.spinner("Extracting keyphrases..."):
                    keyphrases = keyphrase_extractor(combined_text)
                    # Sort by confidence and take the top 5
                    top_keyphrases = sorted(keyphrases, key=lambda x: x['score'], reverse=True)[:5]

                # Show keyphrases
                st.markdown("**🔍 Extracted Keyphrases:**")
                cols = st.columns(5)
                for i, phrase in enumerate(top_keyphrases):
                    cols[i].markdown(f"""
                    <div style="
                        border: 1px solid #ddd;
                        border-radius: 5px;
                        padding: 10px;
                        text-align: center;
                        margin: 5px;
                        background-color: #add8e6;
                    ">
                        <b>{phrase['word']}</b><br>
                        <small>Confidence: {phrase['score']:.2f}</small>
                    </div>
                    """, unsafe_allow_html=True)

                # Generate summary
                with st.spinner("Generating review summary..."):
                    # Limit text length to avoid model limitations
                    max_length = 1024  # Maximum input length for the model
                    if len(combined_text) > max_length:
                        combined_text = combined_text[:max_length]

                    summary = summarizer(combined_text, 
                                        max_length=130, 
                                        min_length=30, 
                                        do_sample=False)

                # Show summary
                st.markdown("**📝 Review Summary:**")
                st.info(summary[0]['summary_text'])

                # Generate downloadable file
                with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
                    result_df.to_csv(tmp.name, index=False)
                    with open(tmp.name, "rb") as f:
                        st.download_button(
                            label="Download Full Results",
                            data=f,
                            file_name="analysis_results.csv",
                            mime="text/csv"
                        )
                os.unlink(tmp.name)

            except Exception as e:
                st.error(f"An error occurred during analysis: {str(e)}")
            finally:
                progress_bar.empty()
                status_text.empty()

if __name__ == "__main__":
    main()