Numan sheikh commited on
Commit
7fb74eb
·
1 Parent(s): 43c3b0a

Upload Sentilyze app files to Hugging Face Space

Browse files
.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ .venv/
2
+ __pycache__/
3
+ *.pyc
4
+ .ipynb_checkpoints/
5
+ .streamlit/
6
+ data/*.csv # If you generate processed CSVs, exclude them
7
+ # For downloaded models (Hugging Face transformers cache)
8
+ .cache/
9
+ venv/
10
+ .DS_Store
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.10-slim-buster
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements file into the container at /app
8
+ COPY requirements.txt .
9
+
10
+ # Install any needed packages specified in requirements.txt
11
+ # Also, download textblob corpora which is required by sentiment_analyzer.py
12
+ RUN pip install --no-cache-dir -r requirements.txt \
13
+ && python -m textblob.download_corpora
14
+
15
+ # Copy the entire project directory into the container at /app
16
+ COPY . .
17
+
18
+ # Expose the port that Streamlit runs on
19
+ EXPOSE 8501
20
+
21
+ # Define the command to run the Streamlit application
22
+ # Streamlit runs on 0.0.0.0 by default in Docker
23
+ CMD ["streamlit", "run", "frontend/app.py", "--server.port=8501", "--server.address=0.0.0.0"]
README.md ADDED
File without changes
backend/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/__init__.py
2
+
3
+ # Import functions from individual modules to make them accessible directly from the 'backend' package
4
+ from .sentiment_analyzer import analyze_sentiment
5
+ from .sarcasm_detector import detect_sarcasm_and_highlight
6
+ from .csv_processor import process_csv_for_dashboard
7
+
8
+ # You can also define __all__ to explicitly list what gets imported with `from backend import *`
9
+ __all__ = [
10
+ "analyze_sentiment",
11
+ "detect_sarcasm_and_highlight",
12
+ "process_csv_for_dashboard"
13
+ ]
backend/csv_processor.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/csv_processor.py
2
+
3
+ import pandas as pd
4
+
5
+ def process_csv_for_dashboard(filepath: str) -> pd.DataFrame:
6
+ """
7
+ Reads a CSV file and returns its content as a pandas DataFrame.
8
+
9
+ Args:
10
+ filepath (str): The path to the CSV file.
11
+
12
+ Returns:
13
+ pd.DataFrame: A DataFrame containing the CSV data.
14
+ Returns an empty DataFrame if the file cannot be read.
15
+ """
16
+ try:
17
+ df = pd.read_csv(filepath)
18
+ # You might want to add more processing here, e.g.,
19
+ # df.dropna(inplace=True)
20
+ # df.columns = [col.lower().replace(' ', '_') for col in df.columns]
21
+ return df
22
+ except FileNotFoundError:
23
+ print(f"Error: CSV file not found at {filepath}")
24
+ return pd.DataFrame()
25
+ except Exception as e:
26
+ print(f"Error processing CSV file: {e}")
27
+ return pd.DataFrame()
28
+
29
+ # Example Usage (for testing this module independently)
30
+ if __name__ == "__main__":
31
+ print("\n--- Testing CSV Processing ---")
32
+ # Create a dummy CSV file for testing
33
+ dummy_csv_content = """Name,Age,City,Review
34
+ John Doe,30,New York,This movie was amazing!
35
+ Jane Smith,24,Los Angeles,It was okay, nothing special.
36
+ Peter Jones,45,Chicago,Absolutely dreadful, what a waste of time.
37
+ Alice Brown,22,Houston,I'm so glad I spent my money on this. (sarcastic)
38
+ """
39
+ with open("dummy_reviews.csv", "w") as f:
40
+ f.write(dummy_csv_content)
41
+
42
+ df = process_csv_for_dashboard("dummy_reviews.csv")
43
+ print("Dummy CSV DataFrame:")
44
+ print(df.head())
45
+
46
+ # Clean up dummy file
47
+ import os
48
+ if os.path.exists("dummy_reviews.csv"):
49
+ os.remove("dummy_reviews.csv")
backend/sarcasm_detector.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/sarcasm_detector.py
2
+
3
+ from transformers import pipeline
4
+
5
+ # Initialize the sarcasm/irony detection pipeline once when the module is loaded
6
+ _sarcasm_pipeline = None
7
+
8
+ def _load_sarcasm_pipeline():
9
+ """Loads the sarcasm/irony detection pipeline if not already loaded."""
10
+ global _sarcasm_pipeline
11
+ if _sarcasm_pipeline is None:
12
+ try:
13
+ # Using cardiffnlp/twitter-roberta-base-irony for general irony/sarcasm detection
14
+ _sarcasm_pipeline = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-irony")
15
+ print("Sarcasm/Irony detection pipeline loaded successfully.")
16
+ except Exception as e:
17
+ print(f"Could not load sarcasm/irony detection pipeline. Ensure 'transformers' and 'torch' are installed. Error: {e}")
18
+ _sarcasm_pipeline = None
19
+ return _sarcasm_pipeline
20
+
21
+ def detect_sarcasm_and_highlight(sentence: str) -> dict:
22
+ """
23
+ Detects sarcasm/irony percentage and attempts to identify sarcastic/joke parts.
24
+ NOTE: Highlighting specific parts of a sentence for sarcasm/jokes is complex
25
+ and often requires attention mechanisms or custom token-level analysis
26
+ from a specialized NLP model. This implementation focuses on the
27
+ sarcasm percentage and provides a placeholder for highlighting.
28
+
29
+ Args:
30
+ sentence (str): The input sentence to analyze.
31
+
32
+ Returns:
33
+ dict: A dictionary containing the sarcasm percentage and
34
+ a highlighted version of the sentence (simplified for now).
35
+ """
36
+ pipeline_instance = _load_sarcasm_pipeline()
37
+ if not isinstance(sentence, str) or not pipeline_instance:
38
+ return {"sarcasm_percent": 0.0, "highlighted_sentence": sentence, "note": "Model not loaded or invalid input."}
39
+
40
+ results = pipeline_instance(sentence)
41
+ sarcasm_percent = 0.0
42
+ predicted_label = "not sarcastic" # Default label
43
+
44
+ if results:
45
+ # This model (cardiffnlp/twitter-roberta-base-irony)
46
+ # returns 'irony' for ironic/sarcastic and 'non_irony' for non-ironic/non-sarcastic.
47
+ main_result = results[0]
48
+ predicted_label_raw = main_result['label'] # e.g., 'irony' or 'non_irony'
49
+ score = main_result['score']
50
+
51
+ if predicted_label_raw == 'irony': # This model's label for irony/sarcasm
52
+ sarcasm_percent = round(score * 100, 2)
53
+ predicted_label = "sarcastic" # Make it more readable for the UI
54
+ elif predicted_label_raw == 'non_irony': # This model's label for non-ironic/non-sarcastic
55
+ # If it's 'non_irony', the score is confidence in NOT_SARCASM.
56
+ # So, sarcasm_percent is (1 - score) * 100.
57
+ sarcasm_percent = round((1 - score) * 100, 2)
58
+ predicted_label = "not sarcastic" # Make it more readable for the UI
59
+ else:
60
+ sarcasm_percent = 0.0
61
+ predicted_label = "unknown label" # Fallback for unexpected labels (shouldn't happen with this model)
62
+
63
+
64
+ # For highlighting, a simple approach: if sarcasm is detected above a threshold,
65
+ # we can wrap the whole sentence or specific keywords.
66
+ highlighted_sentence = sentence
67
+ if sarcasm_percent > 50: # Arbitrary threshold for highlighting
68
+ highlighted_sentence = f"<mark>{sentence}</mark>" # Simple HTML-like highlighting
69
+
70
+ return {
71
+ "sarcasm_percent": sarcasm_percent,
72
+ "highlighted_sentence": highlighted_sentence,
73
+ "predicted_sarcasm_label": predicted_label # More descriptive label for the frontend
74
+ }
75
+
76
+ # Example Usage (for testing this module independently)
77
+ if __name__ == "__main__":
78
+ print("--- Testing Sarcasm/Irony Detection (New Model) ---")
79
+ # This sentence should now ideally be detected as sarcastic
80
+ sentence1 = "Wow, I just love waiting in traffic for two hours—it’s the highlight of my day!"
81
+ sentence2 = "Oh, great, another Monday."
82
+ sentence3 = "I just love getting stuck in traffic for hours."
83
+ sentence4 = "The sun is shining brightly today." # Should be not sarcastic
84
+ sentence5 = "You're a genius! (said after someone made a foolish mistake)" # Clearly sarcastic
85
+
86
+ print(f"'{sentence1}' -> {detect_sarcasm_and_highlight(sentence1)}")
87
+ print(f"'{sentence2}' -> {detect_sarcasm_and_highlight(sentence2)}")
88
+ print(f"'{sentence3}' -> {detect_sarcasm_and_highlight(sentence3)}")
89
+ print(f"'{sentence4}' -> {detect_sarcasm_and_highlight(sentence4)}")
90
+ print(f"'{sentence5}' -> {detect_sarcasm_and_highlight(sentence5)}")
backend/sentiment_analyzer.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/sentiment_analyzer.py
2
+
3
+ from textblob import TextBlob
4
+
5
+ def analyze_sentiment(text: str) -> dict:
6
+ """
7
+ Analyzes the sentiment of a given text.
8
+
9
+ Args:
10
+ text (str): The input text to analyze.
11
+
12
+ Returns:
13
+ dict: A dictionary containing the sentiment class (positive, neutral, negative)
14
+ and the polarity score.
15
+ """
16
+ if not isinstance(text, str):
17
+ return {"class": "invalid_input", "polarity": None}
18
+
19
+ analysis = TextBlob(text)
20
+ polarity = analysis.sentiment.polarity
21
+
22
+ if polarity > 0.05:
23
+ sentiment_class = "positive"
24
+ elif polarity < -0.05:
25
+ sentiment_class = "negative"
26
+ else:
27
+ sentiment_class = "neutral"
28
+
29
+ return {"class": sentiment_class, "polarity": polarity}
30
+
31
+ # Example Usage (for testing this module independently)
32
+ if __name__ == "__main__":
33
+ print("--- Testing Sentiment Analysis ---")
34
+ text1 = "This is a wonderful product, I love it!"
35
+ text2 = "I am so thrilled to have this broken piece of junk."
36
+ text3 = "The weather today is neither good nor bad."
37
+
38
+ print(f"'{text1}' -> {analyze_sentiment(text1)}")
39
+ print(f"'{text2}' -> {analyze_sentiment(text2)}")
40
+ print(f"'{text3}' -> {analyze_sentiment(text3)}")
frontend/app.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # frontend/app.py
2
+
3
+ import streamlit as st
4
+ import pandas as pd
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+ import sys
8
+ import os
9
+
10
+ # Add the parent directory of 'backend' to the Python path
11
+ # This allows importing 'backend' as a package
12
+ # os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) points to 'sentilyze/'
13
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
14
+
15
+ # Import functions directly from the 'backend' package
16
+ # The __init__.py in backend handles the individual imports
17
+ from backend import analyze_sentiment, process_csv_for_dashboard, detect_sarcasm_and_highlight
18
+
19
+ # --- Streamlit App Configuration ---
20
+ st.set_page_config(
21
+ page_title="Sentilyze - Sentiment & Sarcasm Analyzer",
22
+ page_icon="✨",
23
+ layout="wide",
24
+ initial_sidebar_state="expanded"
25
+ )
26
+
27
+ # --- Custom CSS for better aesthetics ---
28
+ st.markdown("""
29
+ <style>
30
+ .main-header {
31
+ font-size: 3em;
32
+ font-weight: bold;
33
+ color: #4CAF50;
34
+ text-align: center;
35
+ margin-bottom: 30px;
36
+ text-shadow: 2px 2px 4px #aaaaaa;
37
+ }
38
+ .stButton>button {
39
+ background-color: #4CAF50;
40
+ color: white;
41
+ border-radius: 12px;
42
+ padding: 10px 24px;
43
+ font-size: 18px;
44
+ border: none;
45
+ cursor: pointer;
46
+ transition: all 0.3s ease;
47
+ box-shadow: 0 4px 8px 0 rgba(0,0,0,0.2);
48
+ }
49
+ .stButton>button:hover {
50
+ background-color: #45a049;
51
+ box-shadow: 0 6px 12px 0 rgba(0,0,0,0.3);
52
+ transform: translateY(-2px);
53
+ }
54
+ .stTextInput>div>div>input {
55
+ border-radius: 12px;
56
+ border: 1px solid #ccc;
57
+ padding: 10px;
58
+ box-shadow: inset 0 1px 3px rgba(0,0,0,0.1);
59
+ }
60
+ .stFileUploader>div>div>button {
61
+ background-color: #2196F3;
62
+ color: white;
63
+ border-radius: 12px;
64
+ padding: 10px 24px;
65
+ font-size: 18px;
66
+ border: none;
67
+ cursor: pointer;
68
+ transition: all 0.3s ease;
69
+ box-shadow: 0 4px 8px 0 rgba(0,0,0,0.2);
70
+ }
71
+ .stFileUploader>div>div>button:hover {
72
+ background-color: #0b7dda;
73
+ box-shadow: 0 6px 12px 0 rgba(0,0,0,0.3);
74
+ transform: translateY(-2px);
75
+ }
76
+ .stAlert {
77
+ border-radius: 12px;
78
+ }
79
+ mark {
80
+ background-color: #FFEB3B; /* Yellow highlight */
81
+ padding: 2px 5px;
82
+ border-radius: 3px;
83
+ }
84
+ </style>
85
+ """, unsafe_allow_html=True)
86
+
87
+
88
+ # --- Header ---
89
+ st.markdown("<h1 class='main-header'>Sentilyze ✨</h1>", unsafe_allow_html=True)
90
+ st.write("Analyze sentiment, detect sarcasm, and visualize insights from your text data.")
91
+
92
+ # --- Navigation (using Streamlit's sidebar for sections) ---
93
+ st.sidebar.title("Navigation")
94
+ page = st.sidebar.radio("Go to", ["Single Text Analysis", "CSV File Analysis", "About"])
95
+
96
+ # --- Single Text Analysis Section ---
97
+ if page == "Single Text Analysis":
98
+ st.header("Analyze Single Text")
99
+ user_input = st.text_area("Enter text here:", "This product is absolutely fantastic!", height=150)
100
+
101
+ col1, col2 = st.columns(2)
102
+
103
+ with col1:
104
+ if st.button("Analyze Sentiment"):
105
+ if user_input:
106
+ sentiment_result = analyze_sentiment(user_input)
107
+ st.success(f"**Sentiment:** {sentiment_result['class'].capitalize()}")
108
+ st.info(f"**Polarity Score:** {sentiment_result['polarity']:.2f} (closer to 1 is positive, -1 is negative)")
109
+ else:
110
+ st.warning("Please enter some text to analyze sentiment.")
111
+
112
+ with col2:
113
+ if st.button("Detect Sarcasm"):
114
+ if user_input:
115
+ sarcasm_result = detect_sarcasm_and_highlight(user_input)
116
+ st.success(f"**Sarcasm Probability:** {sarcasm_result['sarcasm_percent']:.2f}%")
117
+ # FIX: Changed 'predicted_sentiment_label' to 'predicted_sarcasm_label'
118
+ st.info(f"**Predicted Sarcasm (Model's view):** {sarcasm_result['predicted_sarcasm_label'].capitalize()}")
119
+ st.markdown(f"**Highlighted Text:** {sarcasm_result['highlighted_sentence']}", unsafe_allow_html=True)
120
+ if "note" in sarcasm_result:
121
+ st.caption(f"Note: {sarcasm_result['note']}")
122
+ else:
123
+ st.warning("Please enter some text to detect sarcasm.")
124
+
125
+ # --- CSV File Analysis Section ---
126
+ elif page == "CSV File Analysis":
127
+ st.header("Analyze CSV File")
128
+ st.write("Upload a CSV file containing text data for sentiment analysis and dashboard visualization.")
129
+
130
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
131
+
132
+ if uploaded_file is not None:
133
+ # Save the uploaded file temporarily to process it with pandas
134
+ # In a real app, consider more robust temporary file handling or direct BytesIO
135
+ temp_filepath = os.path.join("data", uploaded_file.name)
136
+ with open(temp_filepath, "wb") as f:
137
+ f.write(uploaded_file.getbuffer())
138
+
139
+ df = process_csv_for_dashboard(temp_filepath)
140
+
141
+ if not df.empty:
142
+ st.success("CSV file uploaded and processed successfully!")
143
+ st.subheader("Raw Data Preview:")
144
+ st.dataframe(df.head())
145
+
146
+ # Allow user to select the text column
147
+ text_columns = [col for col in df.columns if df[col].dtype == 'object'] # Assuming text is object/string type
148
+ if not text_columns:
149
+ st.error("No text columns found in the CSV. Please ensure your CSV has columns with review text.")
150
+ else:
151
+ selected_text_column = st.selectbox(
152
+ "Select the column containing text/reviews for analysis:",
153
+ text_columns
154
+ )
155
+
156
+ if st.button(f"Perform Sentiment Analysis on '{selected_text_column}'"):
157
+ with st.spinner("Analyzing sentiment... This might take a while for large files."):
158
+ # Apply sentiment analysis to the selected column
159
+ df['Sentiment'] = df[selected_text_column].astype(str).apply(lambda x: analyze_sentiment(x)['class'])
160
+ df['Polarity'] = df[selected_text_column].astype(str).apply(lambda x: analyze_sentiment(x)['polarity'])
161
+
162
+ st.subheader("Sentiment Analysis Results:")
163
+ st.dataframe(df[[selected_text_column, 'Sentiment', 'Polarity']].head())
164
+
165
+ st.subheader("Sentiment Distribution:")
166
+ sentiment_counts = df['Sentiment'].value_counts()
167
+ st.bar_chart(sentiment_counts)
168
+
169
+ # Interactive Dashboard Elements
170
+ st.subheader("Interactive Dashboard")
171
+
172
+ # Pie chart for sentiment distribution
173
+ fig1, ax1 = plt.subplots()
174
+ sentiment_counts.plot.pie(autopct='%1.1f%%', startangle=90, ax=ax1,
175
+ colors=['#4CAF50', '#FFC107', '#F44336']) # Positive, Neutral, Negative
176
+ ax1.set_ylabel('') # Hide the default 'Sentiment' label
177
+ ax1.set_title('Overall Sentiment Distribution')
178
+ st.pyplot(fig1)
179
+
180
+ # Histogram of Polarity Scores
181
+ fig2, ax2 = plt.subplots()
182
+ sns.histplot(df['Polarity'], bins=20, kde=True, ax=ax2, color='#2196F3')
183
+ ax2.set_title('Distribution of Polarity Scores')
184
+ ax2.set_xlabel('Polarity Score')
185
+ ax2.set_ylabel('Frequency')
186
+ st.pyplot(fig2)
187
+
188
+ # Display data by sentiment type
189
+ st.subheader("View Data by Sentiment Type")
190
+ sentiment_filter = st.selectbox(
191
+ "Filter by Sentiment:",
192
+ ["All", "positive", "neutral", "negative"]
193
+ )
194
+ if sentiment_filter == "All":
195
+ st.dataframe(df[[selected_text_column, 'Sentiment', 'Polarity']])
196
+ else:
197
+ filtered_df = df[df['Sentiment'] == sentiment_filter]
198
+ st.dataframe(filtered_df[[selected_text_column, 'Sentiment', 'Polarity']])
199
+
200
+ else:
201
+ st.error("Could not process the CSV file. Please check its format.")
202
+
203
+ # Clean up the temporary file
204
+ if os.path.exists(temp_filepath):
205
+ os.remove(temp_filepath)
206
+
207
+
208
+ # --- About Section ---
209
+ elif page == "About":
210
+ st.header("About Sentilyze")
211
+ st.write("""
212
+ Sentilyze is a web application designed to help you understand the sentiment and nuances
213
+ of text data. It offers:
214
+
215
+ - **Single Text Analysis:** Quickly determine the sentiment (positive, neutral, negative)
216
+ and potential sarcasm of individual pieces of text.
217
+ - **CSV File Analysis:** Upload your own datasets (e.g., customer reviews, social media comments)
218
+ and get an interactive dashboard showing sentiment distribution and polarity.
219
+ - **Sarcasm Detection:** A feature to estimate the sarcasm percentage in a sentence,
220
+ with basic highlighting (note: advanced sarcasm highlighting is a complex NLP task).
221
+
222
+ **Technologies Used:**
223
+ - **Backend:** Python, `pandas`, `TextBlob`, `transformers` (Hugging Face)
224
+ - **Frontend:** Streamlit
225
+ - **Deployment:** Docker, GitHub, (potential platforms like Streamlit Community Cloud, Heroku, Render)
226
+
227
+ **Developed by:** [Your Name/Team Name Here]
228
+ """)
229
+ st.markdown("[GitHub Repository (Coming Soon!)](#)", unsafe_allow_html=True)
230
+
231
+ # --- Footer ---
232
+ st.markdown("""
233
+ <hr>
234
+ <p style='text-align: center; color: grey;'>Sentilyze © 2023</p>
235
+ """, unsafe_allow_html=True)
requirements.txt ADDED
Binary file (2.21 kB). View file