Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from transformers import pipeline | |
import tempfile | |
import os | |
def calculate_star_rating(positive_percent): | |
"""Convert positive percentage to star rating""" | |
if positive_percent >= 80: | |
return 5 | |
elif positive_percent >= 60: | |
return 4 | |
elif positive_percent >= 40: | |
return 3 | |
elif positive_percent >= 20: | |
return 2 | |
else: | |
return 1 | |
def main(): | |
st.set_page_config(page_title="Movie Review Analysis System", page_icon="π¬") | |
# Custom styles | |
st.markdown(""" | |
<style> | |
.reportview-container { | |
background: #f0f2f6; | |
} | |
.stProgress > div > div > div > div { | |
background-color: #4CAF50; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Model loading | |
with st.spinner("Loading all models, this may take a few minutes..."): | |
try: | |
# Sentiment analysis model | |
classifier = pipeline( | |
"text-classification", | |
model="KeonBlackwell/movie_sentiment_model", | |
tokenizer="distilbert-base-uncased" | |
) | |
# Keyphrase extraction model | |
keyphrase_extractor = pipeline( | |
"token-classification", | |
model="ml6team/keyphrase-extraction-distilbert-inspec", | |
aggregation_strategy="simple" | |
) | |
# Summarization model | |
summarizer = pipeline("summarization", | |
model="facebook/bart-large-cnn") | |
except Exception as e: | |
st.error(f"Model loading failed: {str(e)}") | |
return | |
# Page layout | |
st.title("π¬ Movie Review Batch Analysis System") | |
st.markdown(""" | |
### Instructions: | |
1. Upload a CSV file containing movie reviews (must include a 'comment' column) | |
2. The system will automatically analyze the sentiment of each review | |
3. Generate overall ratings, keyphrase extraction, and summary reports | |
""") | |
# File upload | |
uploaded_file = st.file_uploader("Upload CSV file", type=["csv"]) | |
if uploaded_file is not None: | |
# Read data | |
try: | |
df = pd.read_csv(uploaded_file) | |
if 'comment' not in df.columns: | |
st.error("The CSV file must contain a 'comment' column") | |
return | |
comments = df['comment'].tolist() | |
except Exception as e: | |
st.error(f"File reading failed: {str(e)}") | |
return | |
# Show preview | |
with st.expander("Preview of Original Data (First 5 Rows)"): | |
st.dataframe(df.head()) | |
if st.button("Start Analysis"): | |
# Progress bar settings | |
progress_bar = st.progress(0) | |
status_text = st.empty() | |
results = [] | |
total = len(comments) | |
# Batch prediction | |
try: | |
# Sentiment analysis | |
for i, comment in enumerate(comments): | |
progress = (i+1)/total | |
progress_bar.progress(progress) | |
status_text.text(f"Analyzing sentiment for {i+1}/{total} reviews...") | |
prediction = classifier(comment)[0] | |
results.append({ | |
'comment': comment, | |
'sentiment': 1 if prediction['label'] == 'LABEL_1' else 0, | |
'confidence': prediction['score'] | |
}) | |
# Convert to DataFrame | |
result_df = pd.DataFrame(results) | |
# Calculate statistics | |
positive_count = result_df['sentiment'].sum() | |
total_reviews = len(result_df) | |
positive_percent = (positive_count / total_reviews) * 100 | |
star_rating = calculate_star_rating(positive_percent) | |
# Show results | |
st.success("Sentiment analysis completed!") | |
# Rating display | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric("β Overall Rating", f"{star_rating} Stars") | |
with col2: | |
st.metric("π Positive Reviews", f"{positive_count}/{total_reviews}") | |
with col3: | |
st.metric("π Positive Ratio", f"{positive_percent:.1f}%") | |
# Progress bar visualization | |
st.progress(positive_percent/100) | |
# Show example results | |
with st.expander("View Detailed Analysis Results (First 10 Rows)"): | |
st.dataframe(result_df.head(10)) | |
# Keyphrase extraction and summary | |
st.subheader("π Keyphrase Extraction and Summary of Reviews") | |
# Combine all comments into a single text | |
combined_text = " ".join(comments) | |
# Keyphrase extraction | |
with st.spinner("Extracting keyphrases..."): | |
keyphrases = keyphrase_extractor(combined_text) | |
# Sort by confidence and take the top 5 | |
top_keyphrases = sorted(keyphrases, key=lambda x: x['score'], reverse=True)[:5] | |
# Show keyphrases | |
st.markdown("**π Extracted Keyphrases:**") | |
cols = st.columns(5) | |
for i, phrase in enumerate(top_keyphrases): | |
cols[i].markdown(f""" | |
<div style=" | |
border: 1px solid #ddd; | |
border-radius: 5px; | |
padding: 10px; | |
text-align: center; | |
margin: 5px; | |
background-color: #add8e6; | |
"> | |
<b>{phrase['word']}</b><br> | |
<small>Confidence: {phrase['score']:.2f}</small> | |
</div> | |
""", unsafe_allow_html=True) | |
# Generate summary | |
with st.spinner("Generating review summary..."): | |
# Limit text length to avoid model limitations | |
max_length = 1024 # Maximum input length for the model | |
if len(combined_text) > max_length: | |
combined_text = combined_text[:max_length] | |
summary = summarizer(combined_text, | |
max_length=130, | |
min_length=30, | |
do_sample=False) | |
# Show summary | |
st.markdown("**π Review Summary:**") | |
st.info(summary[0]['summary_text']) | |
# Generate downloadable file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp: | |
result_df.to_csv(tmp.name, index=False) | |
with open(tmp.name, "rb") as f: | |
st.download_button( | |
label="Download Full Results", | |
data=f, | |
file_name="analysis_results.csv", | |
mime="text/csv" | |
) | |
os.unlink(tmp.name) | |
except Exception as e: | |
st.error(f"An error occurred during analysis: {str(e)}") | |
finally: | |
progress_bar.empty() | |
status_text.empty() | |
if __name__ == "__main__": | |
main() |