File size: 7,475 Bytes
9322a5c
244d99e
8987036
244d99e
 
 
 
fda07e6
244d99e
 
 
 
 
 
 
 
 
 
8533608
c3c7832
fda07e6
 
 
244d99e
 
 
 
 
 
 
 
 
 
 
6878db4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fda07e6
 
244d99e
fda07e6
 
 
 
244d99e
c5302a0
fda07e6
 
 
244d99e
fda07e6
244d99e
 
 
fda07e6
244d99e
fda07e6
244d99e
 
fda07e6
244d99e
267ce0e
fda07e6
 
244d99e
267ce0e
fda07e6
 
244d99e
 
fda07e6
6878db4
 
 
 
244d99e
6878db4
 
 
 
 
 
 
 
 
 
 
 
 
fda07e6
244d99e
fda07e6
 
244d99e
 
 
 
fda07e6
 
 
 
 
244d99e
 
fda07e6
244d99e
fda07e6
244d99e
fda07e6
 
 
244d99e
fda07e6
 
 
244d99e
fda07e6
 
 
 
 
40ac14d
fda07e6
 
6878db4
 
 
 
fda07e6
 
 
244d99e
 
 
 
 
 
 
 
 
4d65165
244d99e
 
fda07e6
244d99e
 
fda07e6
 
6878db4
 
 
 
 
 
 
 
 
 
fda07e6
 
 
6878db4
fda07e6
 
244d99e
 
 
 
fda07e6
244d99e
 
 
 
 
fda07e6
244d99e
fda07e6
244d99e
 
 
149b30a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import streamlit as st
import pandas as pd
from transformers import pipeline
import tempfile
import os

def calculate_star_rating(positive_percent):
    """Convert positive percentage to star rating"""
    if positive_percent >= 80:
        return 5
    elif positive_percent >= 60:
        return 4
    elif positive_percent >= 40:
        return 3
    elif positive_percent >= 20:
        return 2
    else:
        return 1

def main():
    st.set_page_config(page_title="Movie Review Analysis System", page_icon="🎬")

    # Custom styles
    st.markdown("""
    <style>
    .reportview-container {
        background: #f0f2f6;
    }
    .stProgress > div > div > div > div {
        background-color: #4CAF50;
    }
    </style>
    """, unsafe_allow_html=True)

    # Model loading
    with st.spinner("Loading all models, this may take a few minutes..."):
        try:
            # Sentiment analysis model
            classifier = pipeline(
                "text-classification",
                model="KeonBlackwell/movie_sentiment_model",
                tokenizer="distilbert-base-uncased"
            )

            # Keyphrase extraction model
            keyphrase_extractor = pipeline(
                "token-classification",
                model="ml6team/keyphrase-extraction-distilbert-inspec",
                aggregation_strategy="simple"
            )

            # Summarization model
            summarizer = pipeline("summarization",
                                model="facebook/bart-large-cnn")

        except Exception as e:
            st.error(f"Model loading failed: {str(e)}")
            return

    # Page layout
    st.title("🎬 Movie Review Batch Analysis System")
    st.markdown("""
    ### Instructions:
    1. Upload a CSV file containing movie reviews (must include a 'comment' column)
    2. The system will automatically analyze the sentiment of each review
    3. Generate overall ratings, keyphrase extraction, and summary reports
    """)

    # File upload
    uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])

    if uploaded_file is not None:
        # Read data
        try:
            df = pd.read_csv(uploaded_file)
            if 'comment' not in df.columns:
                st.error("The CSV file must contain a 'comment' column")
                return

            comments = df['comment'].tolist()
        except Exception as e:
            st.error(f"File reading failed: {str(e)}")
            return

        # Show preview
        with st.expander("Preview of Original Data (First 5 Rows)"):
            st.dataframe(df.head())

        if st.button("Start Analysis"):
            # Progress bar settings
            progress_bar = st.progress(0)
            status_text = st.empty()

            results = []
            total = len(comments)

            # Batch prediction
            try:
                # Sentiment analysis
                for i, comment in enumerate(comments):
                    progress = (i+1)/total
                    progress_bar.progress(progress)
                    status_text.text(f"Analyzing sentiment for {i+1}/{total} reviews...")

                    prediction = classifier(comment)[0]
                    results.append({
                        'comment': comment,
                        'sentiment': 1 if prediction['label'] == 'LABEL_1' else 0,
                        'confidence': prediction['score']
                    })

                # Convert to DataFrame
                result_df = pd.DataFrame(results)

                # Calculate statistics
                positive_count = result_df['sentiment'].sum()
                total_reviews = len(result_df)
                positive_percent = (positive_count / total_reviews) * 100
                star_rating = calculate_star_rating(positive_percent)

                # Show results
                st.success("Sentiment analysis completed!")

                # Rating display
                col1, col2, col3 = st.columns(3)
                with col1:
                    st.metric("⭐ Overall Rating", f"{star_rating} Stars")
                with col2:
                    st.metric("πŸ‘ Positive Reviews", f"{positive_count}/{total_reviews}")
                with col3:
                    st.metric("πŸ“ˆ Positive Ratio", f"{positive_percent:.1f}%")

                # Progress bar visualization
                st.progress(positive_percent/100)

                # Show example results
                with st.expander("View Detailed Analysis Results (First 10 Rows)"):
                    st.dataframe(result_df.head(10))

                # Keyphrase extraction and summary
                st.subheader("πŸ“Œ Keyphrase Extraction and Summary of Reviews")

                # Combine all comments into a single text
                combined_text = " ".join(comments)

                # Keyphrase extraction
                with st.spinner("Extracting keyphrases..."):
                    keyphrases = keyphrase_extractor(combined_text)
                    # Sort by confidence and take the top 5
                    top_keyphrases = sorted(keyphrases, key=lambda x: x['score'], reverse=True)[:5]

                # Show keyphrases
                st.markdown("**πŸ” Extracted Keyphrases:**")
                cols = st.columns(5)
                for i, phrase in enumerate(top_keyphrases):
                    cols[i].markdown(f"""
                    <div style="
                        border: 1px solid #ddd;
                        border-radius: 5px;
                        padding: 10px;
                        text-align: center;
                        margin: 5px;
                        background-color: #add8e6;
                    ">
                        <b>{phrase['word']}</b><br>
                        <small>Confidence: {phrase['score']:.2f}</small>
                    </div>
                    """, unsafe_allow_html=True)

                # Generate summary
                with st.spinner("Generating review summary..."):
                    # Limit text length to avoid model limitations
                    max_length = 1024  # Maximum input length for the model
                    if len(combined_text) > max_length:
                        combined_text = combined_text[:max_length]

                    summary = summarizer(combined_text, 
                                        max_length=130, 
                                        min_length=30, 
                                        do_sample=False)

                # Show summary
                st.markdown("**πŸ“ Review Summary:**")
                st.info(summary[0]['summary_text'])

                # Generate downloadable file
                with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
                    result_df.to_csv(tmp.name, index=False)
                    with open(tmp.name, "rb") as f:
                        st.download_button(
                            label="Download Full Results",
                            data=f,
                            file_name="analysis_results.csv",
                            mime="text/csv"
                        )
                os.unlink(tmp.name)

            except Exception as e:
                st.error(f"An error occurred during analysis: {str(e)}")
            finally:
                progress_bar.empty()
                status_text.empty()

if __name__ == "__main__":
    main()