import streamlit as st import pandas as pd from transformers import pipeline import tempfile import os def calculate_star_rating(positive_percent): """Convert positive percentage to star rating""" if positive_percent >= 80: return 5 elif positive_percent >= 60: return 4 elif positive_percent >= 40: return 3 elif positive_percent >= 20: return 2 else: return 1 def main(): st.set_page_config(page_title="Movie Review Analysis System", page_icon="🎬") # Custom styles st.markdown(""" """, unsafe_allow_html=True) # Model loading with st.spinner("Loading all models, this may take a few minutes..."): try: # Sentiment analysis model classifier = pipeline( "text-classification", model="KeonBlackwell/movie_sentiment_model", tokenizer="distilbert-base-uncased" ) # Keyphrase extraction model keyphrase_extractor = pipeline( "token-classification", model="ml6team/keyphrase-extraction-distilbert-inspec", aggregation_strategy="simple" ) # Summarization model summarizer = pipeline("summarization", model="facebook/bart-large-cnn") except Exception as e: st.error(f"Model loading failed: {str(e)}") return # Page layout st.title("🎬 Movie Review Batch Analysis System") st.markdown(""" ### Instructions: 1. Upload a CSV file containing movie reviews (must include a 'comment' column) 2. The system will automatically analyze the sentiment of each review 3. Generate overall ratings, keyphrase extraction, and summary reports """) # File upload uploaded_file = st.file_uploader("Upload CSV file", type=["csv"]) if uploaded_file is not None: # Read data try: df = pd.read_csv(uploaded_file) if 'comment' not in df.columns: st.error("The CSV file must contain a 'comment' column") return comments = df['comment'].tolist() except Exception as e: st.error(f"File reading failed: {str(e)}") return # Show preview with st.expander("Preview of Original Data (First 5 Rows)"): st.dataframe(df.head()) if st.button("Start Analysis"): # Progress bar settings progress_bar = st.progress(0) status_text = st.empty() results = [] total = len(comments) # Batch prediction try: # Sentiment analysis for i, comment in enumerate(comments): progress = (i+1)/total progress_bar.progress(progress) status_text.text(f"Analyzing sentiment for {i+1}/{total} reviews...") prediction = classifier(comment)[0] results.append({ 'comment': comment, 'sentiment': 1 if prediction['label'] == 'LABEL_1' else 0, 'confidence': prediction['score'] }) # Convert to DataFrame result_df = pd.DataFrame(results) # Calculate statistics positive_count = result_df['sentiment'].sum() total_reviews = len(result_df) positive_percent = (positive_count / total_reviews) * 100 star_rating = calculate_star_rating(positive_percent) # Show results st.success("Sentiment analysis completed!") # Rating display col1, col2, col3 = st.columns(3) with col1: st.metric("⭐ Overall Rating", f"{star_rating} Stars") with col2: st.metric("👍 Positive Reviews", f"{positive_count}/{total_reviews}") with col3: st.metric("📈 Positive Ratio", f"{positive_percent:.1f}%") # Progress bar visualization st.progress(positive_percent/100) # Show example results with st.expander("View Detailed Analysis Results (First 10 Rows)"): st.dataframe(result_df.head(10)) # Keyphrase extraction and summary st.subheader("📌 Keyphrase Extraction and Summary of Reviews") # Combine all comments into a single text combined_text = " ".join(comments) # Keyphrase extraction with st.spinner("Extracting keyphrases..."): keyphrases = keyphrase_extractor(combined_text) # Sort by confidence and take the top 5 top_keyphrases = sorted(keyphrases, key=lambda x: x['score'], reverse=True)[:5] # Show keyphrases st.markdown("**🔍 Extracted Keyphrases:**") cols = st.columns(5) for i, phrase in enumerate(top_keyphrases): cols[i].markdown(f"""
{phrase['word']}
Confidence: {phrase['score']:.2f}
""", unsafe_allow_html=True) # Generate summary with st.spinner("Generating review summary..."): # Limit text length to avoid model limitations max_length = 1024 # Maximum input length for the model if len(combined_text) > max_length: combined_text = combined_text[:max_length] summary = summarizer(combined_text, max_length=130, min_length=30, do_sample=False) # Show summary st.markdown("**📝 Review Summary:**") st.info(summary[0]['summary_text']) # Generate downloadable file with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp: result_df.to_csv(tmp.name, index=False) with open(tmp.name, "rb") as f: st.download_button( label="Download Full Results", data=f, file_name="analysis_results.csv", mime="text/csv" ) os.unlink(tmp.name) except Exception as e: st.error(f"An error occurred during analysis: {str(e)}") finally: progress_bar.empty() status_text.empty() if __name__ == "__main__": main()