Spaces:
Sleeping
Sleeping
File size: 7,475 Bytes
9322a5c 244d99e 8987036 244d99e fda07e6 244d99e 8533608 c3c7832 fda07e6 244d99e 6878db4 fda07e6 244d99e fda07e6 244d99e c5302a0 fda07e6 244d99e fda07e6 244d99e fda07e6 244d99e fda07e6 244d99e fda07e6 244d99e 267ce0e fda07e6 244d99e 267ce0e fda07e6 244d99e fda07e6 6878db4 244d99e 6878db4 fda07e6 244d99e fda07e6 244d99e fda07e6 244d99e fda07e6 244d99e fda07e6 244d99e fda07e6 244d99e fda07e6 244d99e fda07e6 40ac14d fda07e6 6878db4 fda07e6 244d99e 4d65165 244d99e fda07e6 244d99e fda07e6 6878db4 fda07e6 6878db4 fda07e6 244d99e fda07e6 244d99e fda07e6 244d99e fda07e6 244d99e 149b30a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 |
import streamlit as st
import pandas as pd
from transformers import pipeline
import tempfile
import os
def calculate_star_rating(positive_percent):
"""Convert positive percentage to star rating"""
if positive_percent >= 80:
return 5
elif positive_percent >= 60:
return 4
elif positive_percent >= 40:
return 3
elif positive_percent >= 20:
return 2
else:
return 1
def main():
st.set_page_config(page_title="Movie Review Analysis System", page_icon="π¬")
# Custom styles
st.markdown("""
<style>
.reportview-container {
background: #f0f2f6;
}
.stProgress > div > div > div > div {
background-color: #4CAF50;
}
</style>
""", unsafe_allow_html=True)
# Model loading
with st.spinner("Loading all models, this may take a few minutes..."):
try:
# Sentiment analysis model
classifier = pipeline(
"text-classification",
model="KeonBlackwell/movie_sentiment_model",
tokenizer="distilbert-base-uncased"
)
# Keyphrase extraction model
keyphrase_extractor = pipeline(
"token-classification",
model="ml6team/keyphrase-extraction-distilbert-inspec",
aggregation_strategy="simple"
)
# Summarization model
summarizer = pipeline("summarization",
model="facebook/bart-large-cnn")
except Exception as e:
st.error(f"Model loading failed: {str(e)}")
return
# Page layout
st.title("π¬ Movie Review Batch Analysis System")
st.markdown("""
### Instructions:
1. Upload a CSV file containing movie reviews (must include a 'comment' column)
2. The system will automatically analyze the sentiment of each review
3. Generate overall ratings, keyphrase extraction, and summary reports
""")
# File upload
uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
if uploaded_file is not None:
# Read data
try:
df = pd.read_csv(uploaded_file)
if 'comment' not in df.columns:
st.error("The CSV file must contain a 'comment' column")
return
comments = df['comment'].tolist()
except Exception as e:
st.error(f"File reading failed: {str(e)}")
return
# Show preview
with st.expander("Preview of Original Data (First 5 Rows)"):
st.dataframe(df.head())
if st.button("Start Analysis"):
# Progress bar settings
progress_bar = st.progress(0)
status_text = st.empty()
results = []
total = len(comments)
# Batch prediction
try:
# Sentiment analysis
for i, comment in enumerate(comments):
progress = (i+1)/total
progress_bar.progress(progress)
status_text.text(f"Analyzing sentiment for {i+1}/{total} reviews...")
prediction = classifier(comment)[0]
results.append({
'comment': comment,
'sentiment': 1 if prediction['label'] == 'LABEL_1' else 0,
'confidence': prediction['score']
})
# Convert to DataFrame
result_df = pd.DataFrame(results)
# Calculate statistics
positive_count = result_df['sentiment'].sum()
total_reviews = len(result_df)
positive_percent = (positive_count / total_reviews) * 100
star_rating = calculate_star_rating(positive_percent)
# Show results
st.success("Sentiment analysis completed!")
# Rating display
col1, col2, col3 = st.columns(3)
with col1:
st.metric("β Overall Rating", f"{star_rating} Stars")
with col2:
st.metric("π Positive Reviews", f"{positive_count}/{total_reviews}")
with col3:
st.metric("π Positive Ratio", f"{positive_percent:.1f}%")
# Progress bar visualization
st.progress(positive_percent/100)
# Show example results
with st.expander("View Detailed Analysis Results (First 10 Rows)"):
st.dataframe(result_df.head(10))
# Keyphrase extraction and summary
st.subheader("π Keyphrase Extraction and Summary of Reviews")
# Combine all comments into a single text
combined_text = " ".join(comments)
# Keyphrase extraction
with st.spinner("Extracting keyphrases..."):
keyphrases = keyphrase_extractor(combined_text)
# Sort by confidence and take the top 5
top_keyphrases = sorted(keyphrases, key=lambda x: x['score'], reverse=True)[:5]
# Show keyphrases
st.markdown("**π Extracted Keyphrases:**")
cols = st.columns(5)
for i, phrase in enumerate(top_keyphrases):
cols[i].markdown(f"""
<div style="
border: 1px solid #ddd;
border-radius: 5px;
padding: 10px;
text-align: center;
margin: 5px;
background-color: #add8e6;
">
<b>{phrase['word']}</b><br>
<small>Confidence: {phrase['score']:.2f}</small>
</div>
""", unsafe_allow_html=True)
# Generate summary
with st.spinner("Generating review summary..."):
# Limit text length to avoid model limitations
max_length = 1024 # Maximum input length for the model
if len(combined_text) > max_length:
combined_text = combined_text[:max_length]
summary = summarizer(combined_text,
max_length=130,
min_length=30,
do_sample=False)
# Show summary
st.markdown("**π Review Summary:**")
st.info(summary[0]['summary_text'])
# Generate downloadable file
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
result_df.to_csv(tmp.name, index=False)
with open(tmp.name, "rb") as f:
st.download_button(
label="Download Full Results",
data=f,
file_name="analysis_results.csv",
mime="text/csv"
)
os.unlink(tmp.name)
except Exception as e:
st.error(f"An error occurred during analysis: {str(e)}")
finally:
progress_bar.empty()
status_text.empty()
if __name__ == "__main__":
main() |