import streamlit as st import pandas as pd from transformers import pipeline import tempfile import os def calculate_star_rating(positive_percent): """Convert positive percentage to star rating""" if positive_percent >= 80: return 5 elif positive_percent >= 60: return 4 elif positive_percent >= 40: return 3 elif positive_percent >= 20: return 2 else: return 1 def main(): st.set_page_config(page_title="Movie Review Analysis System", page_icon="🎬") # Custom styles st.markdown(""" """, unsafe_allow_html=True) # Model loading with st.spinner("Loading all models, this may take a few minutes..."): try: # Sentiment analysis model classifier = pipeline( "text-classification", model="KeonBlackwell/movie_sentiment_model", tokenizer="distilbert-base-uncased" ) # Keyphrase extraction model keyphrase_extractor = pipeline( "token-classification", model="ml6team/keyphrase-extraction-distilbert-inspec", aggregation_strategy="simple" ) # Summarization model summarizer = pipeline("summarization", model="facebook/bart-large-cnn") except Exception as e: st.error(f"Model loading failed: {str(e)}") return # Page layout st.title("🎬 Movie Review Batch Analysis System") st.markdown(""" ### Instructions: 1. Upload a CSV file containing movie reviews (must include a 'comment' column) 2. The system will automatically analyze the sentiment of each review 3. Generate overall ratings, keyphrase extraction, and summary reports """) # File upload uploaded_file = st.file_uploader("Upload CSV file", type=["csv"]) if uploaded_file is not None: # Read data try: df = pd.read_csv(uploaded_file) if 'comment' not in df.columns: st.error("The CSV file must contain a 'comment' column") return comments = df['comment'].tolist() except Exception as e: st.error(f"File reading failed: {str(e)}") return # Show preview with st.expander("Preview of Original Data (First 5 Rows)"): st.dataframe(df.head()) if st.button("Start Analysis"): # Progress bar settings progress_bar = st.progress(0) status_text = st.empty() results = [] total = len(comments) # Batch prediction try: # Sentiment analysis for i, comment in enumerate(comments): progress = (i+1)/total progress_bar.progress(progress) status_text.text(f"Analyzing sentiment for {i+1}/{total} reviews...") prediction = classifier(comment)[0] results.append({ 'comment': comment, 'sentiment': 1 if prediction['label'] == 'LABEL_1' else 0, 'confidence': prediction['score'] }) # Convert to DataFrame result_df = pd.DataFrame(results) # Calculate statistics positive_count = result_df['sentiment'].sum() total_reviews = len(result_df) positive_percent = (positive_count / total_reviews) * 100 star_rating = calculate_star_rating(positive_percent) # Show results st.success("Sentiment analysis completed!") # Rating display col1, col2, col3 = st.columns(3) with col1: st.metric("⭐ Overall Rating", f"{star_rating} Stars") with col2: st.metric("👍 Positive Reviews", f"{positive_count}/{total_reviews}") with col3: st.metric("📈 Positive Ratio", f"{positive_percent:.1f}%") # Progress bar visualization st.progress(positive_percent/100) # Show example results with st.expander("View Detailed Analysis Results (First 10 Rows)"): st.dataframe(result_df.head(10)) # Keyphrase extraction and summary st.subheader("📌 Keyphrase Extraction and Summary of Reviews") # Combine all comments into a single text combined_text = " ".join(comments) # Keyphrase extraction with st.spinner("Extracting keyphrases..."): keyphrases = keyphrase_extractor(combined_text) # Sort by confidence and take the top 5 top_keyphrases = sorted(keyphrases, key=lambda x: x['score'], reverse=True)[:5] # Show keyphrases st.markdown("**🔍 Extracted Keyphrases:**") cols = st.columns(5) for i, phrase in enumerate(top_keyphrases): cols[i].markdown(f"""