Spaces:
Runtime error
Runtime error
Numan sheikh
commited on
Commit
·
7fb74eb
1
Parent(s):
43c3b0a
Upload Sentilyze app files to Hugging Face Space
Browse files- .gitignore +10 -0
- Dockerfile +23 -0
- README.md +0 -0
- backend/__init__.py +13 -0
- backend/csv_processor.py +49 -0
- backend/sarcasm_detector.py +90 -0
- backend/sentiment_analyzer.py +40 -0
- frontend/app.py +235 -0
- requirements.txt +0 -0
.gitignore
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.venv/
|
2 |
+
__pycache__/
|
3 |
+
*.pyc
|
4 |
+
.ipynb_checkpoints/
|
5 |
+
.streamlit/
|
6 |
+
data/*.csv # If you generate processed CSVs, exclude them
|
7 |
+
# For downloaded models (Hugging Face transformers cache)
|
8 |
+
.cache/
|
9 |
+
venv/
|
10 |
+
.DS_Store
|
Dockerfile
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use an official Python runtime as a parent image
|
2 |
+
FROM python:3.10-slim-buster
|
3 |
+
|
4 |
+
# Set the working directory in the container
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
# Copy the requirements file into the container at /app
|
8 |
+
COPY requirements.txt .
|
9 |
+
|
10 |
+
# Install any needed packages specified in requirements.txt
|
11 |
+
# Also, download textblob corpora which is required by sentiment_analyzer.py
|
12 |
+
RUN pip install --no-cache-dir -r requirements.txt \
|
13 |
+
&& python -m textblob.download_corpora
|
14 |
+
|
15 |
+
# Copy the entire project directory into the container at /app
|
16 |
+
COPY . .
|
17 |
+
|
18 |
+
# Expose the port that Streamlit runs on
|
19 |
+
EXPOSE 8501
|
20 |
+
|
21 |
+
# Define the command to run the Streamlit application
|
22 |
+
# Streamlit runs on 0.0.0.0 by default in Docker
|
23 |
+
CMD ["streamlit", "run", "frontend/app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
README.md
ADDED
File without changes
|
backend/__init__.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# backend/__init__.py
|
2 |
+
|
3 |
+
# Import functions from individual modules to make them accessible directly from the 'backend' package
|
4 |
+
from .sentiment_analyzer import analyze_sentiment
|
5 |
+
from .sarcasm_detector import detect_sarcasm_and_highlight
|
6 |
+
from .csv_processor import process_csv_for_dashboard
|
7 |
+
|
8 |
+
# You can also define __all__ to explicitly list what gets imported with `from backend import *`
|
9 |
+
__all__ = [
|
10 |
+
"analyze_sentiment",
|
11 |
+
"detect_sarcasm_and_highlight",
|
12 |
+
"process_csv_for_dashboard"
|
13 |
+
]
|
backend/csv_processor.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# backend/csv_processor.py
|
2 |
+
|
3 |
+
import pandas as pd
|
4 |
+
|
5 |
+
def process_csv_for_dashboard(filepath: str) -> pd.DataFrame:
|
6 |
+
"""
|
7 |
+
Reads a CSV file and returns its content as a pandas DataFrame.
|
8 |
+
|
9 |
+
Args:
|
10 |
+
filepath (str): The path to the CSV file.
|
11 |
+
|
12 |
+
Returns:
|
13 |
+
pd.DataFrame: A DataFrame containing the CSV data.
|
14 |
+
Returns an empty DataFrame if the file cannot be read.
|
15 |
+
"""
|
16 |
+
try:
|
17 |
+
df = pd.read_csv(filepath)
|
18 |
+
# You might want to add more processing here, e.g.,
|
19 |
+
# df.dropna(inplace=True)
|
20 |
+
# df.columns = [col.lower().replace(' ', '_') for col in df.columns]
|
21 |
+
return df
|
22 |
+
except FileNotFoundError:
|
23 |
+
print(f"Error: CSV file not found at {filepath}")
|
24 |
+
return pd.DataFrame()
|
25 |
+
except Exception as e:
|
26 |
+
print(f"Error processing CSV file: {e}")
|
27 |
+
return pd.DataFrame()
|
28 |
+
|
29 |
+
# Example Usage (for testing this module independently)
|
30 |
+
if __name__ == "__main__":
|
31 |
+
print("\n--- Testing CSV Processing ---")
|
32 |
+
# Create a dummy CSV file for testing
|
33 |
+
dummy_csv_content = """Name,Age,City,Review
|
34 |
+
John Doe,30,New York,This movie was amazing!
|
35 |
+
Jane Smith,24,Los Angeles,It was okay, nothing special.
|
36 |
+
Peter Jones,45,Chicago,Absolutely dreadful, what a waste of time.
|
37 |
+
Alice Brown,22,Houston,I'm so glad I spent my money on this. (sarcastic)
|
38 |
+
"""
|
39 |
+
with open("dummy_reviews.csv", "w") as f:
|
40 |
+
f.write(dummy_csv_content)
|
41 |
+
|
42 |
+
df = process_csv_for_dashboard("dummy_reviews.csv")
|
43 |
+
print("Dummy CSV DataFrame:")
|
44 |
+
print(df.head())
|
45 |
+
|
46 |
+
# Clean up dummy file
|
47 |
+
import os
|
48 |
+
if os.path.exists("dummy_reviews.csv"):
|
49 |
+
os.remove("dummy_reviews.csv")
|
backend/sarcasm_detector.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# backend/sarcasm_detector.py
|
2 |
+
|
3 |
+
from transformers import pipeline
|
4 |
+
|
5 |
+
# Initialize the sarcasm/irony detection pipeline once when the module is loaded
|
6 |
+
_sarcasm_pipeline = None
|
7 |
+
|
8 |
+
def _load_sarcasm_pipeline():
|
9 |
+
"""Loads the sarcasm/irony detection pipeline if not already loaded."""
|
10 |
+
global _sarcasm_pipeline
|
11 |
+
if _sarcasm_pipeline is None:
|
12 |
+
try:
|
13 |
+
# Using cardiffnlp/twitter-roberta-base-irony for general irony/sarcasm detection
|
14 |
+
_sarcasm_pipeline = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-irony")
|
15 |
+
print("Sarcasm/Irony detection pipeline loaded successfully.")
|
16 |
+
except Exception as e:
|
17 |
+
print(f"Could not load sarcasm/irony detection pipeline. Ensure 'transformers' and 'torch' are installed. Error: {e}")
|
18 |
+
_sarcasm_pipeline = None
|
19 |
+
return _sarcasm_pipeline
|
20 |
+
|
21 |
+
def detect_sarcasm_and_highlight(sentence: str) -> dict:
|
22 |
+
"""
|
23 |
+
Detects sarcasm/irony percentage and attempts to identify sarcastic/joke parts.
|
24 |
+
NOTE: Highlighting specific parts of a sentence for sarcasm/jokes is complex
|
25 |
+
and often requires attention mechanisms or custom token-level analysis
|
26 |
+
from a specialized NLP model. This implementation focuses on the
|
27 |
+
sarcasm percentage and provides a placeholder for highlighting.
|
28 |
+
|
29 |
+
Args:
|
30 |
+
sentence (str): The input sentence to analyze.
|
31 |
+
|
32 |
+
Returns:
|
33 |
+
dict: A dictionary containing the sarcasm percentage and
|
34 |
+
a highlighted version of the sentence (simplified for now).
|
35 |
+
"""
|
36 |
+
pipeline_instance = _load_sarcasm_pipeline()
|
37 |
+
if not isinstance(sentence, str) or not pipeline_instance:
|
38 |
+
return {"sarcasm_percent": 0.0, "highlighted_sentence": sentence, "note": "Model not loaded or invalid input."}
|
39 |
+
|
40 |
+
results = pipeline_instance(sentence)
|
41 |
+
sarcasm_percent = 0.0
|
42 |
+
predicted_label = "not sarcastic" # Default label
|
43 |
+
|
44 |
+
if results:
|
45 |
+
# This model (cardiffnlp/twitter-roberta-base-irony)
|
46 |
+
# returns 'irony' for ironic/sarcastic and 'non_irony' for non-ironic/non-sarcastic.
|
47 |
+
main_result = results[0]
|
48 |
+
predicted_label_raw = main_result['label'] # e.g., 'irony' or 'non_irony'
|
49 |
+
score = main_result['score']
|
50 |
+
|
51 |
+
if predicted_label_raw == 'irony': # This model's label for irony/sarcasm
|
52 |
+
sarcasm_percent = round(score * 100, 2)
|
53 |
+
predicted_label = "sarcastic" # Make it more readable for the UI
|
54 |
+
elif predicted_label_raw == 'non_irony': # This model's label for non-ironic/non-sarcastic
|
55 |
+
# If it's 'non_irony', the score is confidence in NOT_SARCASM.
|
56 |
+
# So, sarcasm_percent is (1 - score) * 100.
|
57 |
+
sarcasm_percent = round((1 - score) * 100, 2)
|
58 |
+
predicted_label = "not sarcastic" # Make it more readable for the UI
|
59 |
+
else:
|
60 |
+
sarcasm_percent = 0.0
|
61 |
+
predicted_label = "unknown label" # Fallback for unexpected labels (shouldn't happen with this model)
|
62 |
+
|
63 |
+
|
64 |
+
# For highlighting, a simple approach: if sarcasm is detected above a threshold,
|
65 |
+
# we can wrap the whole sentence or specific keywords.
|
66 |
+
highlighted_sentence = sentence
|
67 |
+
if sarcasm_percent > 50: # Arbitrary threshold for highlighting
|
68 |
+
highlighted_sentence = f"<mark>{sentence}</mark>" # Simple HTML-like highlighting
|
69 |
+
|
70 |
+
return {
|
71 |
+
"sarcasm_percent": sarcasm_percent,
|
72 |
+
"highlighted_sentence": highlighted_sentence,
|
73 |
+
"predicted_sarcasm_label": predicted_label # More descriptive label for the frontend
|
74 |
+
}
|
75 |
+
|
76 |
+
# Example Usage (for testing this module independently)
|
77 |
+
if __name__ == "__main__":
|
78 |
+
print("--- Testing Sarcasm/Irony Detection (New Model) ---")
|
79 |
+
# This sentence should now ideally be detected as sarcastic
|
80 |
+
sentence1 = "Wow, I just love waiting in traffic for two hours—it’s the highlight of my day!"
|
81 |
+
sentence2 = "Oh, great, another Monday."
|
82 |
+
sentence3 = "I just love getting stuck in traffic for hours."
|
83 |
+
sentence4 = "The sun is shining brightly today." # Should be not sarcastic
|
84 |
+
sentence5 = "You're a genius! (said after someone made a foolish mistake)" # Clearly sarcastic
|
85 |
+
|
86 |
+
print(f"'{sentence1}' -> {detect_sarcasm_and_highlight(sentence1)}")
|
87 |
+
print(f"'{sentence2}' -> {detect_sarcasm_and_highlight(sentence2)}")
|
88 |
+
print(f"'{sentence3}' -> {detect_sarcasm_and_highlight(sentence3)}")
|
89 |
+
print(f"'{sentence4}' -> {detect_sarcasm_and_highlight(sentence4)}")
|
90 |
+
print(f"'{sentence5}' -> {detect_sarcasm_and_highlight(sentence5)}")
|
backend/sentiment_analyzer.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# backend/sentiment_analyzer.py
|
2 |
+
|
3 |
+
from textblob import TextBlob
|
4 |
+
|
5 |
+
def analyze_sentiment(text: str) -> dict:
|
6 |
+
"""
|
7 |
+
Analyzes the sentiment of a given text.
|
8 |
+
|
9 |
+
Args:
|
10 |
+
text (str): The input text to analyze.
|
11 |
+
|
12 |
+
Returns:
|
13 |
+
dict: A dictionary containing the sentiment class (positive, neutral, negative)
|
14 |
+
and the polarity score.
|
15 |
+
"""
|
16 |
+
if not isinstance(text, str):
|
17 |
+
return {"class": "invalid_input", "polarity": None}
|
18 |
+
|
19 |
+
analysis = TextBlob(text)
|
20 |
+
polarity = analysis.sentiment.polarity
|
21 |
+
|
22 |
+
if polarity > 0.05:
|
23 |
+
sentiment_class = "positive"
|
24 |
+
elif polarity < -0.05:
|
25 |
+
sentiment_class = "negative"
|
26 |
+
else:
|
27 |
+
sentiment_class = "neutral"
|
28 |
+
|
29 |
+
return {"class": sentiment_class, "polarity": polarity}
|
30 |
+
|
31 |
+
# Example Usage (for testing this module independently)
|
32 |
+
if __name__ == "__main__":
|
33 |
+
print("--- Testing Sentiment Analysis ---")
|
34 |
+
text1 = "This is a wonderful product, I love it!"
|
35 |
+
text2 = "I am so thrilled to have this broken piece of junk."
|
36 |
+
text3 = "The weather today is neither good nor bad."
|
37 |
+
|
38 |
+
print(f"'{text1}' -> {analyze_sentiment(text1)}")
|
39 |
+
print(f"'{text2}' -> {analyze_sentiment(text2)}")
|
40 |
+
print(f"'{text3}' -> {analyze_sentiment(text3)}")
|
frontend/app.py
ADDED
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# frontend/app.py
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
import pandas as pd
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import seaborn as sns
|
7 |
+
import sys
|
8 |
+
import os
|
9 |
+
|
10 |
+
# Add the parent directory of 'backend' to the Python path
|
11 |
+
# This allows importing 'backend' as a package
|
12 |
+
# os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) points to 'sentilyze/'
|
13 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
14 |
+
|
15 |
+
# Import functions directly from the 'backend' package
|
16 |
+
# The __init__.py in backend handles the individual imports
|
17 |
+
from backend import analyze_sentiment, process_csv_for_dashboard, detect_sarcasm_and_highlight
|
18 |
+
|
19 |
+
# --- Streamlit App Configuration ---
|
20 |
+
st.set_page_config(
|
21 |
+
page_title="Sentilyze - Sentiment & Sarcasm Analyzer",
|
22 |
+
page_icon="✨",
|
23 |
+
layout="wide",
|
24 |
+
initial_sidebar_state="expanded"
|
25 |
+
)
|
26 |
+
|
27 |
+
# --- Custom CSS for better aesthetics ---
|
28 |
+
st.markdown("""
|
29 |
+
<style>
|
30 |
+
.main-header {
|
31 |
+
font-size: 3em;
|
32 |
+
font-weight: bold;
|
33 |
+
color: #4CAF50;
|
34 |
+
text-align: center;
|
35 |
+
margin-bottom: 30px;
|
36 |
+
text-shadow: 2px 2px 4px #aaaaaa;
|
37 |
+
}
|
38 |
+
.stButton>button {
|
39 |
+
background-color: #4CAF50;
|
40 |
+
color: white;
|
41 |
+
border-radius: 12px;
|
42 |
+
padding: 10px 24px;
|
43 |
+
font-size: 18px;
|
44 |
+
border: none;
|
45 |
+
cursor: pointer;
|
46 |
+
transition: all 0.3s ease;
|
47 |
+
box-shadow: 0 4px 8px 0 rgba(0,0,0,0.2);
|
48 |
+
}
|
49 |
+
.stButton>button:hover {
|
50 |
+
background-color: #45a049;
|
51 |
+
box-shadow: 0 6px 12px 0 rgba(0,0,0,0.3);
|
52 |
+
transform: translateY(-2px);
|
53 |
+
}
|
54 |
+
.stTextInput>div>div>input {
|
55 |
+
border-radius: 12px;
|
56 |
+
border: 1px solid #ccc;
|
57 |
+
padding: 10px;
|
58 |
+
box-shadow: inset 0 1px 3px rgba(0,0,0,0.1);
|
59 |
+
}
|
60 |
+
.stFileUploader>div>div>button {
|
61 |
+
background-color: #2196F3;
|
62 |
+
color: white;
|
63 |
+
border-radius: 12px;
|
64 |
+
padding: 10px 24px;
|
65 |
+
font-size: 18px;
|
66 |
+
border: none;
|
67 |
+
cursor: pointer;
|
68 |
+
transition: all 0.3s ease;
|
69 |
+
box-shadow: 0 4px 8px 0 rgba(0,0,0,0.2);
|
70 |
+
}
|
71 |
+
.stFileUploader>div>div>button:hover {
|
72 |
+
background-color: #0b7dda;
|
73 |
+
box-shadow: 0 6px 12px 0 rgba(0,0,0,0.3);
|
74 |
+
transform: translateY(-2px);
|
75 |
+
}
|
76 |
+
.stAlert {
|
77 |
+
border-radius: 12px;
|
78 |
+
}
|
79 |
+
mark {
|
80 |
+
background-color: #FFEB3B; /* Yellow highlight */
|
81 |
+
padding: 2px 5px;
|
82 |
+
border-radius: 3px;
|
83 |
+
}
|
84 |
+
</style>
|
85 |
+
""", unsafe_allow_html=True)
|
86 |
+
|
87 |
+
|
88 |
+
# --- Header ---
|
89 |
+
st.markdown("<h1 class='main-header'>Sentilyze ✨</h1>", unsafe_allow_html=True)
|
90 |
+
st.write("Analyze sentiment, detect sarcasm, and visualize insights from your text data.")
|
91 |
+
|
92 |
+
# --- Navigation (using Streamlit's sidebar for sections) ---
|
93 |
+
st.sidebar.title("Navigation")
|
94 |
+
page = st.sidebar.radio("Go to", ["Single Text Analysis", "CSV File Analysis", "About"])
|
95 |
+
|
96 |
+
# --- Single Text Analysis Section ---
|
97 |
+
if page == "Single Text Analysis":
|
98 |
+
st.header("Analyze Single Text")
|
99 |
+
user_input = st.text_area("Enter text here:", "This product is absolutely fantastic!", height=150)
|
100 |
+
|
101 |
+
col1, col2 = st.columns(2)
|
102 |
+
|
103 |
+
with col1:
|
104 |
+
if st.button("Analyze Sentiment"):
|
105 |
+
if user_input:
|
106 |
+
sentiment_result = analyze_sentiment(user_input)
|
107 |
+
st.success(f"**Sentiment:** {sentiment_result['class'].capitalize()}")
|
108 |
+
st.info(f"**Polarity Score:** {sentiment_result['polarity']:.2f} (closer to 1 is positive, -1 is negative)")
|
109 |
+
else:
|
110 |
+
st.warning("Please enter some text to analyze sentiment.")
|
111 |
+
|
112 |
+
with col2:
|
113 |
+
if st.button("Detect Sarcasm"):
|
114 |
+
if user_input:
|
115 |
+
sarcasm_result = detect_sarcasm_and_highlight(user_input)
|
116 |
+
st.success(f"**Sarcasm Probability:** {sarcasm_result['sarcasm_percent']:.2f}%")
|
117 |
+
# FIX: Changed 'predicted_sentiment_label' to 'predicted_sarcasm_label'
|
118 |
+
st.info(f"**Predicted Sarcasm (Model's view):** {sarcasm_result['predicted_sarcasm_label'].capitalize()}")
|
119 |
+
st.markdown(f"**Highlighted Text:** {sarcasm_result['highlighted_sentence']}", unsafe_allow_html=True)
|
120 |
+
if "note" in sarcasm_result:
|
121 |
+
st.caption(f"Note: {sarcasm_result['note']}")
|
122 |
+
else:
|
123 |
+
st.warning("Please enter some text to detect sarcasm.")
|
124 |
+
|
125 |
+
# --- CSV File Analysis Section ---
|
126 |
+
elif page == "CSV File Analysis":
|
127 |
+
st.header("Analyze CSV File")
|
128 |
+
st.write("Upload a CSV file containing text data for sentiment analysis and dashboard visualization.")
|
129 |
+
|
130 |
+
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
|
131 |
+
|
132 |
+
if uploaded_file is not None:
|
133 |
+
# Save the uploaded file temporarily to process it with pandas
|
134 |
+
# In a real app, consider more robust temporary file handling or direct BytesIO
|
135 |
+
temp_filepath = os.path.join("data", uploaded_file.name)
|
136 |
+
with open(temp_filepath, "wb") as f:
|
137 |
+
f.write(uploaded_file.getbuffer())
|
138 |
+
|
139 |
+
df = process_csv_for_dashboard(temp_filepath)
|
140 |
+
|
141 |
+
if not df.empty:
|
142 |
+
st.success("CSV file uploaded and processed successfully!")
|
143 |
+
st.subheader("Raw Data Preview:")
|
144 |
+
st.dataframe(df.head())
|
145 |
+
|
146 |
+
# Allow user to select the text column
|
147 |
+
text_columns = [col for col in df.columns if df[col].dtype == 'object'] # Assuming text is object/string type
|
148 |
+
if not text_columns:
|
149 |
+
st.error("No text columns found in the CSV. Please ensure your CSV has columns with review text.")
|
150 |
+
else:
|
151 |
+
selected_text_column = st.selectbox(
|
152 |
+
"Select the column containing text/reviews for analysis:",
|
153 |
+
text_columns
|
154 |
+
)
|
155 |
+
|
156 |
+
if st.button(f"Perform Sentiment Analysis on '{selected_text_column}'"):
|
157 |
+
with st.spinner("Analyzing sentiment... This might take a while for large files."):
|
158 |
+
# Apply sentiment analysis to the selected column
|
159 |
+
df['Sentiment'] = df[selected_text_column].astype(str).apply(lambda x: analyze_sentiment(x)['class'])
|
160 |
+
df['Polarity'] = df[selected_text_column].astype(str).apply(lambda x: analyze_sentiment(x)['polarity'])
|
161 |
+
|
162 |
+
st.subheader("Sentiment Analysis Results:")
|
163 |
+
st.dataframe(df[[selected_text_column, 'Sentiment', 'Polarity']].head())
|
164 |
+
|
165 |
+
st.subheader("Sentiment Distribution:")
|
166 |
+
sentiment_counts = df['Sentiment'].value_counts()
|
167 |
+
st.bar_chart(sentiment_counts)
|
168 |
+
|
169 |
+
# Interactive Dashboard Elements
|
170 |
+
st.subheader("Interactive Dashboard")
|
171 |
+
|
172 |
+
# Pie chart for sentiment distribution
|
173 |
+
fig1, ax1 = plt.subplots()
|
174 |
+
sentiment_counts.plot.pie(autopct='%1.1f%%', startangle=90, ax=ax1,
|
175 |
+
colors=['#4CAF50', '#FFC107', '#F44336']) # Positive, Neutral, Negative
|
176 |
+
ax1.set_ylabel('') # Hide the default 'Sentiment' label
|
177 |
+
ax1.set_title('Overall Sentiment Distribution')
|
178 |
+
st.pyplot(fig1)
|
179 |
+
|
180 |
+
# Histogram of Polarity Scores
|
181 |
+
fig2, ax2 = plt.subplots()
|
182 |
+
sns.histplot(df['Polarity'], bins=20, kde=True, ax=ax2, color='#2196F3')
|
183 |
+
ax2.set_title('Distribution of Polarity Scores')
|
184 |
+
ax2.set_xlabel('Polarity Score')
|
185 |
+
ax2.set_ylabel('Frequency')
|
186 |
+
st.pyplot(fig2)
|
187 |
+
|
188 |
+
# Display data by sentiment type
|
189 |
+
st.subheader("View Data by Sentiment Type")
|
190 |
+
sentiment_filter = st.selectbox(
|
191 |
+
"Filter by Sentiment:",
|
192 |
+
["All", "positive", "neutral", "negative"]
|
193 |
+
)
|
194 |
+
if sentiment_filter == "All":
|
195 |
+
st.dataframe(df[[selected_text_column, 'Sentiment', 'Polarity']])
|
196 |
+
else:
|
197 |
+
filtered_df = df[df['Sentiment'] == sentiment_filter]
|
198 |
+
st.dataframe(filtered_df[[selected_text_column, 'Sentiment', 'Polarity']])
|
199 |
+
|
200 |
+
else:
|
201 |
+
st.error("Could not process the CSV file. Please check its format.")
|
202 |
+
|
203 |
+
# Clean up the temporary file
|
204 |
+
if os.path.exists(temp_filepath):
|
205 |
+
os.remove(temp_filepath)
|
206 |
+
|
207 |
+
|
208 |
+
# --- About Section ---
|
209 |
+
elif page == "About":
|
210 |
+
st.header("About Sentilyze")
|
211 |
+
st.write("""
|
212 |
+
Sentilyze is a web application designed to help you understand the sentiment and nuances
|
213 |
+
of text data. It offers:
|
214 |
+
|
215 |
+
- **Single Text Analysis:** Quickly determine the sentiment (positive, neutral, negative)
|
216 |
+
and potential sarcasm of individual pieces of text.
|
217 |
+
- **CSV File Analysis:** Upload your own datasets (e.g., customer reviews, social media comments)
|
218 |
+
and get an interactive dashboard showing sentiment distribution and polarity.
|
219 |
+
- **Sarcasm Detection:** A feature to estimate the sarcasm percentage in a sentence,
|
220 |
+
with basic highlighting (note: advanced sarcasm highlighting is a complex NLP task).
|
221 |
+
|
222 |
+
**Technologies Used:**
|
223 |
+
- **Backend:** Python, `pandas`, `TextBlob`, `transformers` (Hugging Face)
|
224 |
+
- **Frontend:** Streamlit
|
225 |
+
- **Deployment:** Docker, GitHub, (potential platforms like Streamlit Community Cloud, Heroku, Render)
|
226 |
+
|
227 |
+
**Developed by:** [Your Name/Team Name Here]
|
228 |
+
""")
|
229 |
+
st.markdown("[GitHub Repository (Coming Soon!)](#)", unsafe_allow_html=True)
|
230 |
+
|
231 |
+
# --- Footer ---
|
232 |
+
st.markdown("""
|
233 |
+
<hr>
|
234 |
+
<p style='text-align: center; color: grey;'>Sentilyze © 2023</p>
|
235 |
+
""", unsafe_allow_html=True)
|
requirements.txt
ADDED
Binary file (2.21 kB). View file
|
|