test2025SpL2 / app.py
ysuneu's picture
Update app.py
4d65165 verified
import streamlit as st
import pandas as pd
from transformers import pipeline
import tempfile
import os
def calculate_star_rating(positive_percent):
"""Convert positive percentage to star rating"""
if positive_percent >= 80:
return 5
elif positive_percent >= 60:
return 4
elif positive_percent >= 40:
return 3
elif positive_percent >= 20:
return 2
else:
return 1
def main():
st.set_page_config(page_title="Movie Review Analysis System", page_icon="🎬")
# Custom styles
st.markdown("""
<style>
.reportview-container {
background: #f0f2f6;
}
.stProgress > div > div > div > div {
background-color: #4CAF50;
}
</style>
""", unsafe_allow_html=True)
# Model loading
with st.spinner("Loading all models, this may take a few minutes..."):
try:
# Sentiment analysis model
classifier = pipeline(
"text-classification",
model="KeonBlackwell/movie_sentiment_model",
tokenizer="distilbert-base-uncased"
)
# Keyphrase extraction model
keyphrase_extractor = pipeline(
"token-classification",
model="ml6team/keyphrase-extraction-distilbert-inspec",
aggregation_strategy="simple"
)
# Summarization model
summarizer = pipeline("summarization",
model="facebook/bart-large-cnn")
except Exception as e:
st.error(f"Model loading failed: {str(e)}")
return
# Page layout
st.title("🎬 Movie Review Batch Analysis System")
st.markdown("""
### Instructions:
1. Upload a CSV file containing movie reviews (must include a 'comment' column)
2. The system will automatically analyze the sentiment of each review
3. Generate overall ratings, keyphrase extraction, and summary reports
""")
# File upload
uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
if uploaded_file is not None:
# Read data
try:
df = pd.read_csv(uploaded_file)
if 'comment' not in df.columns:
st.error("The CSV file must contain a 'comment' column")
return
comments = df['comment'].tolist()
except Exception as e:
st.error(f"File reading failed: {str(e)}")
return
# Show preview
with st.expander("Preview of Original Data (First 5 Rows)"):
st.dataframe(df.head())
if st.button("Start Analysis"):
# Progress bar settings
progress_bar = st.progress(0)
status_text = st.empty()
results = []
total = len(comments)
# Batch prediction
try:
# Sentiment analysis
for i, comment in enumerate(comments):
progress = (i+1)/total
progress_bar.progress(progress)
status_text.text(f"Analyzing sentiment for {i+1}/{total} reviews...")
prediction = classifier(comment)[0]
results.append({
'comment': comment,
'sentiment': 1 if prediction['label'] == 'LABEL_1' else 0,
'confidence': prediction['score']
})
# Convert to DataFrame
result_df = pd.DataFrame(results)
# Calculate statistics
positive_count = result_df['sentiment'].sum()
total_reviews = len(result_df)
positive_percent = (positive_count / total_reviews) * 100
star_rating = calculate_star_rating(positive_percent)
# Show results
st.success("Sentiment analysis completed!")
# Rating display
col1, col2, col3 = st.columns(3)
with col1:
st.metric("⭐ Overall Rating", f"{star_rating} Stars")
with col2:
st.metric("πŸ‘ Positive Reviews", f"{positive_count}/{total_reviews}")
with col3:
st.metric("πŸ“ˆ Positive Ratio", f"{positive_percent:.1f}%")
# Progress bar visualization
st.progress(positive_percent/100)
# Show example results
with st.expander("View Detailed Analysis Results (First 10 Rows)"):
st.dataframe(result_df.head(10))
# Keyphrase extraction and summary
st.subheader("πŸ“Œ Keyphrase Extraction and Summary of Reviews")
# Combine all comments into a single text
combined_text = " ".join(comments)
# Keyphrase extraction
with st.spinner("Extracting keyphrases..."):
keyphrases = keyphrase_extractor(combined_text)
# Sort by confidence and take the top 5
top_keyphrases = sorted(keyphrases, key=lambda x: x['score'], reverse=True)[:5]
# Show keyphrases
st.markdown("**πŸ” Extracted Keyphrases:**")
cols = st.columns(5)
for i, phrase in enumerate(top_keyphrases):
cols[i].markdown(f"""
<div style="
border: 1px solid #ddd;
border-radius: 5px;
padding: 10px;
text-align: center;
margin: 5px;
background-color: #add8e6;
">
<b>{phrase['word']}</b><br>
<small>Confidence: {phrase['score']:.2f}</small>
</div>
""", unsafe_allow_html=True)
# Generate summary
with st.spinner("Generating review summary..."):
# Limit text length to avoid model limitations
max_length = 1024 # Maximum input length for the model
if len(combined_text) > max_length:
combined_text = combined_text[:max_length]
summary = summarizer(combined_text,
max_length=130,
min_length=30,
do_sample=False)
# Show summary
st.markdown("**πŸ“ Review Summary:**")
st.info(summary[0]['summary_text'])
# Generate downloadable file
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
result_df.to_csv(tmp.name, index=False)
with open(tmp.name, "rb") as f:
st.download_button(
label="Download Full Results",
data=f,
file_name="analysis_results.csv",
mime="text/csv"
)
os.unlink(tmp.name)
except Exception as e:
st.error(f"An error occurred during analysis: {str(e)}")
finally:
progress_bar.empty()
status_text.empty()
if __name__ == "__main__":
main()