Spaces:

1ETERNAL
/

Sentilyze-APP

Runtime error

App Files Files Community

Numan sheikh commited on May 29

Commit

7fb74eb

1 Parent(s): 43c3b0a

Upload Sentilyze app files to Hugging Face Space

Browse files

Files changed (9) hide show

.gitignore +10 -0
Dockerfile +23 -0
README.md +0 -0
backend/__init__.py +13 -0
backend/csv_processor.py +49 -0
backend/sarcasm_detector.py +90 -0
backend/sentiment_analyzer.py +40 -0
frontend/app.py +235 -0
requirements.txt +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,10 @@

+.venv/
+__pycache__/
+*.pyc
+.ipynb_checkpoints/
+.streamlit/
+data/*.csv # If you generate processed CSVs, exclude them
+# For downloaded models (Hugging Face transformers cache)
+.cache/
+venv/
+.DS_Store

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+# Use an official Python runtime as a parent image
+FROM python:3.10-slim-buster
+# Set the working directory in the container
+WORKDIR /app
+# Copy the requirements file into the container at /app
+COPY requirements.txt .
+# Install any needed packages specified in requirements.txt
+# Also, download textblob corpora which is required by sentiment_analyzer.py
+RUN pip install --no-cache-dir -r requirements.txt \
+    && python -m textblob.download_corpora
+# Copy the entire project directory into the container at /app
+COPY . .
+# Expose the port that Streamlit runs on
+EXPOSE 8501
+# Define the command to run the Streamlit application
+# Streamlit runs on 0.0.0.0 by default in Docker
+CMD ["streamlit", "run", "frontend/app.py", "--server.port=8501", "--server.address=0.0.0.0"]

README.md ADDED Viewed

File without changes

backend/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# backend/__init__.py
+# Import functions from individual modules to make them accessible directly from the 'backend' package
+from .sentiment_analyzer import analyze_sentiment
+from .sarcasm_detector import detect_sarcasm_and_highlight
+from .csv_processor import process_csv_for_dashboard
+# You can also define __all__ to explicitly list what gets imported with `from backend import *`
+__all__ = [
+    "analyze_sentiment",
+    "detect_sarcasm_and_highlight",
+    "process_csv_for_dashboard"
+]

backend/csv_processor.py ADDED Viewed

	@@ -0,0 +1,49 @@

+    # backend/csv_processor.py
+import pandas as pd
+def process_csv_for_dashboard(filepath: str) -> pd.DataFrame:
+    """
+    Reads a CSV file and returns its content as a pandas DataFrame.
+    Args:
+        filepath (str): The path to the CSV file.
+    Returns:
+        pd.DataFrame: A DataFrame containing the CSV data.
+                        Returns an empty DataFrame if the file cannot be read.
+    """
+    try:
+        df = pd.read_csv(filepath)
+        # You might want to add more processing here, e.g.,
+        # df.dropna(inplace=True)
+        # df.columns = [col.lower().replace(' ', '_') for col in df.columns]
+        return df
+    except FileNotFoundError:
+        print(f"Error: CSV file not found at {filepath}")
+        return pd.DataFrame()
+    except Exception as e:
+        print(f"Error processing CSV file: {e}")
+        return pd.DataFrame()
+# Example Usage (for testing this module independently)
+if __name__ == "__main__":
+    print("\n--- Testing CSV Processing ---")
+    # Create a dummy CSV file for testing
+    dummy_csv_content = """Name,Age,City,Review
+John Doe,30,New York,This movie was amazing!
+Jane Smith,24,Los Angeles,It was okay, nothing special.
+Peter Jones,45,Chicago,Absolutely dreadful, what a waste of time.
+Alice Brown,22,Houston,I'm so glad I spent my money on this. (sarcastic)
+"""
+    with open("dummy_reviews.csv", "w") as f:
+        f.write(dummy_csv_content)
+    df = process_csv_for_dashboard("dummy_reviews.csv")
+    print("Dummy CSV DataFrame:")
+    print(df.head())
+    # Clean up dummy file
+import os
+if os.path.exists("dummy_reviews.csv"):
+    os.remove("dummy_reviews.csv")

backend/sarcasm_detector.py ADDED Viewed

	@@ -0,0 +1,90 @@

+# backend/sarcasm_detector.py
+from transformers import pipeline
+# Initialize the sarcasm/irony detection pipeline once when the module is loaded
+_sarcasm_pipeline = None
+def _load_sarcasm_pipeline():
+    """Loads the sarcasm/irony detection pipeline if not already loaded."""
+    global _sarcasm_pipeline
+    if _sarcasm_pipeline is None:
+        try:
+            # Using cardiffnlp/twitter-roberta-base-irony for general irony/sarcasm detection
+            _sarcasm_pipeline = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-irony")
+            print("Sarcasm/Irony detection pipeline loaded successfully.")
+        except Exception as e:
+            print(f"Could not load sarcasm/irony detection pipeline. Ensure 'transformers' and 'torch' are installed. Error: {e}")
+            _sarcasm_pipeline = None
+    return _sarcasm_pipeline
+def detect_sarcasm_and_highlight(sentence: str) -> dict:
+    """
+    Detects sarcasm/irony percentage and attempts to identify sarcastic/joke parts.
+    NOTE: Highlighting specific parts of a sentence for sarcasm/jokes is complex
+          and often requires attention mechanisms or custom token-level analysis
+          from a specialized NLP model. This implementation focuses on the
+          sarcasm percentage and provides a placeholder for highlighting.
+    Args:
+        sentence (str): The input sentence to analyze.
+    Returns:
+        dict: A dictionary containing the sarcasm percentage and
+              a highlighted version of the sentence (simplified for now).
+    """
+    pipeline_instance = _load_sarcasm_pipeline()
+    if not isinstance(sentence, str) or not pipeline_instance:
+        return {"sarcasm_percent": 0.0, "highlighted_sentence": sentence, "note": "Model not loaded or invalid input."}
+    results = pipeline_instance(sentence)
+    sarcasm_percent = 0.0
+    predicted_label = "not sarcastic" # Default label
+    if results:
+        # This model (cardiffnlp/twitter-roberta-base-irony)
+        # returns 'irony' for ironic/sarcastic and 'non_irony' for non-ironic/non-sarcastic.
+        main_result = results[0]
+        predicted_label_raw = main_result['label'] # e.g., 'irony' or 'non_irony'
+        score = main_result['score']
+        if predicted_label_raw == 'irony': # This model's label for irony/sarcasm
+            sarcasm_percent = round(score * 100, 2)
+            predicted_label = "sarcastic" # Make it more readable for the UI
+        elif predicted_label_raw == 'non_irony': # This model's label for non-ironic/non-sarcastic
+            # If it's 'non_irony', the score is confidence in NOT_SARCASM.
+            # So, sarcasm_percent is (1 - score) * 100.
+            sarcasm_percent = round((1 - score) * 100, 2)
+            predicted_label = "not sarcastic" # Make it more readable for the UI
+        else:
+            sarcasm_percent = 0.0
+            predicted_label = "unknown label" # Fallback for unexpected labels (shouldn't happen with this model)
+    # For highlighting, a simple approach: if sarcasm is detected above a threshold,
+    # we can wrap the whole sentence or specific keywords.
+    highlighted_sentence = sentence
+    if sarcasm_percent > 50: # Arbitrary threshold for highlighting
+        highlighted_sentence = f"<mark>{sentence}</mark>" # Simple HTML-like highlighting
+    return {
+        "sarcasm_percent": sarcasm_percent,
+        "highlighted_sentence": highlighted_sentence,
+        "predicted_sarcasm_label": predicted_label # More descriptive label for the frontend
+    }
+# Example Usage (for testing this module independently)
+if __name__ == "__main__":
+    print("--- Testing Sarcasm/Irony Detection (New Model) ---")
+    # This sentence should now ideally be detected as sarcastic
+    sentence1 = "Wow, I just love waiting in traffic for two hours—it’s the highlight of my day!"
+    sentence2 = "Oh, great, another Monday."
+    sentence3 = "I just love getting stuck in traffic for hours."
+    sentence4 = "The sun is shining brightly today." # Should be not sarcastic
+    sentence5 = "You're a genius! (said after someone made a foolish mistake)" # Clearly sarcastic
+    print(f"'{sentence1}' -> {detect_sarcasm_and_highlight(sentence1)}")
+    print(f"'{sentence2}' -> {detect_sarcasm_and_highlight(sentence2)}")
+    print(f"'{sentence3}' -> {detect_sarcasm_and_highlight(sentence3)}")
+    print(f"'{sentence4}' -> {detect_sarcasm_and_highlight(sentence4)}")
+    print(f"'{sentence5}' -> {detect_sarcasm_and_highlight(sentence5)}")

backend/sentiment_analyzer.py ADDED Viewed

	@@ -0,0 +1,40 @@

+# backend/sentiment_analyzer.py
+from textblob import TextBlob
+def analyze_sentiment(text: str) -> dict:
+    """
+    Analyzes the sentiment of a given text.
+    Args:
+        text (str): The input text to analyze.
+    Returns:
+        dict: A dictionary containing the sentiment class (positive, neutral, negative)
+                and the polarity score.
+    """
+    if not isinstance(text, str):
+        return {"class": "invalid_input", "polarity": None}
+    analysis = TextBlob(text)
+    polarity = analysis.sentiment.polarity
+    if polarity > 0.05:
+        sentiment_class = "positive"
+    elif polarity < -0.05:
+        sentiment_class = "negative"
+    else:
+        sentiment_class = "neutral"
+    return {"class": sentiment_class, "polarity": polarity}
+# Example Usage (for testing this module independently)
+if __name__ == "__main__":
+    print("--- Testing Sentiment Analysis ---")
+    text1 = "This is a wonderful product, I love it!"
+    text2 = "I am so thrilled to have this broken piece of junk."
+    text3 = "The weather today is neither good nor bad."
+    print(f"'{text1}' -> {analyze_sentiment(text1)}")
+    print(f"'{text2}' -> {analyze_sentiment(text2)}")
+    print(f"'{text3}' -> {analyze_sentiment(text3)}")

frontend/app.py ADDED Viewed

	@@ -0,0 +1,235 @@

+# frontend/app.py
+import streamlit as st
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+import sys
+import os
+# Add the parent directory of 'backend' to the Python path
+# This allows importing 'backend' as a package
+# os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) points to 'sentilyze/'
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+# Import functions directly from the 'backend' package
+# The __init__.py in backend handles the individual imports
+from backend import analyze_sentiment, process_csv_for_dashboard, detect_sarcasm_and_highlight
+# --- Streamlit App Configuration ---
+st.set_page_config(
+    page_title="Sentilyze - Sentiment & Sarcasm Analyzer",
+    page_icon="✨",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# --- Custom CSS for better aesthetics ---
+st.markdown("""
+    <style>
+    .main-header {
+        font-size: 3em;
+        font-weight: bold;
+        color: #4CAF50;
+        text-align: center;
+        margin-bottom: 30px;
+        text-shadow: 2px 2px 4px #aaaaaa;
+    }
+    .stButton>button {
+        background-color: #4CAF50;
+        color: white;
+        border-radius: 12px;
+        padding: 10px 24px;
+        font-size: 18px;
+        border: none;
+        cursor: pointer;
+        transition: all 0.3s ease;
+        box-shadow: 0 4px 8px 0 rgba(0,0,0,0.2);
+    }
+    .stButton>button:hover {
+        background-color: #45a049;
+        box-shadow: 0 6px 12px 0 rgba(0,0,0,0.3);
+        transform: translateY(-2px);
+    }
+    .stTextInput>div>div>input {
+        border-radius: 12px;
+        border: 1px solid #ccc;
+        padding: 10px;
+        box-shadow: inset 0 1px 3px rgba(0,0,0,0.1);
+    }
+    .stFileUploader>div>div>button {
+        background-color: #2196F3;
+        color: white;
+        border-radius: 12px;
+        padding: 10px 24px;
+        font-size: 18px;
+        border: none;
+        cursor: pointer;
+        transition: all 0.3s ease;
+        box-shadow: 0 4px 8px 0 rgba(0,0,0,0.2);
+    }
+    .stFileUploader>div>div>button:hover {
+        background-color: #0b7dda;
+        box-shadow: 0 6px 12px 0 rgba(0,0,0,0.3);
+        transform: translateY(-2px);
+    }
+    .stAlert {
+        border-radius: 12px;
+    }
+    mark {
+        background-color: #FFEB3B; /* Yellow highlight */
+        padding: 2px 5px;
+        border-radius: 3px;
+    }
+    </style>
+    """, unsafe_allow_html=True)
+# --- Header ---
+st.markdown("<h1 class='main-header'>Sentilyze ✨</h1>", unsafe_allow_html=True)
+st.write("Analyze sentiment, detect sarcasm, and visualize insights from your text data.")
+# --- Navigation (using Streamlit's sidebar for sections) ---
+st.sidebar.title("Navigation")
+page = st.sidebar.radio("Go to", ["Single Text Analysis", "CSV File Analysis", "About"])
+# --- Single Text Analysis Section ---
+if page == "Single Text Analysis":
+    st.header("Analyze Single Text")
+    user_input = st.text_area("Enter text here:", "This product is absolutely fantastic!", height=150)
+    col1, col2 = st.columns(2)
+    with col1:
+        if st.button("Analyze Sentiment"):
+            if user_input:
+                sentiment_result = analyze_sentiment(user_input)
+                st.success(f"**Sentiment:** {sentiment_result['class'].capitalize()}")
+                st.info(f"**Polarity Score:** {sentiment_result['polarity']:.2f} (closer to 1 is positive, -1 is negative)")
+            else:
+                st.warning("Please enter some text to analyze sentiment.")
+    with col2:
+        if st.button("Detect Sarcasm"):
+            if user_input:
+                sarcasm_result = detect_sarcasm_and_highlight(user_input)
+                st.success(f"**Sarcasm Probability:** {sarcasm_result['sarcasm_percent']:.2f}%")
+                # FIX: Changed 'predicted_sentiment_label' to 'predicted_sarcasm_label'
+                st.info(f"**Predicted Sarcasm (Model's view):** {sarcasm_result['predicted_sarcasm_label'].capitalize()}")
+                st.markdown(f"**Highlighted Text:** {sarcasm_result['highlighted_sentence']}", unsafe_allow_html=True)
+                if "note" in sarcasm_result:
+                    st.caption(f"Note: {sarcasm_result['note']}")
+            else:
+                st.warning("Please enter some text to detect sarcasm.")
+# --- CSV File Analysis Section ---
+elif page == "CSV File Analysis":
+    st.header("Analyze CSV File")
+    st.write("Upload a CSV file containing text data for sentiment analysis and dashboard visualization.")
+    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+    if uploaded_file is not None:
+        # Save the uploaded file temporarily to process it with pandas
+        # In a real app, consider more robust temporary file handling or direct BytesIO
+        temp_filepath = os.path.join("data", uploaded_file.name)
+        with open(temp_filepath, "wb") as f:
+            f.write(uploaded_file.getbuffer())
+        df = process_csv_for_dashboard(temp_filepath)
+        if not df.empty:
+            st.success("CSV file uploaded and processed successfully!")
+            st.subheader("Raw Data Preview:")
+            st.dataframe(df.head())
+            # Allow user to select the text column
+            text_columns = [col for col in df.columns if df[col].dtype == 'object'] # Assuming text is object/string type
+            if not text_columns:
+                st.error("No text columns found in the CSV. Please ensure your CSV has columns with review text.")
+            else:
+                selected_text_column = st.selectbox(
+                    "Select the column containing text/reviews for analysis:",
+                    text_columns
+                )
+                if st.button(f"Perform Sentiment Analysis on '{selected_text_column}'"):
+                    with st.spinner("Analyzing sentiment... This might take a while for large files."):
+                        # Apply sentiment analysis to the selected column
+                        df['Sentiment'] = df[selected_text_column].astype(str).apply(lambda x: analyze_sentiment(x)['class'])
+                        df['Polarity'] = df[selected_text_column].astype(str).apply(lambda x: analyze_sentiment(x)['polarity'])
+                    st.subheader("Sentiment Analysis Results:")
+                    st.dataframe(df[[selected_text_column, 'Sentiment', 'Polarity']].head())
+                    st.subheader("Sentiment Distribution:")
+                    sentiment_counts = df['Sentiment'].value_counts()
+                    st.bar_chart(sentiment_counts)
+                    # Interactive Dashboard Elements
+                    st.subheader("Interactive Dashboard")
+                    # Pie chart for sentiment distribution
+                    fig1, ax1 = plt.subplots()
+                    sentiment_counts.plot.pie(autopct='%1.1f%%', startangle=90, ax=ax1,
+                                            colors=['#4CAF50', '#FFC107', '#F44336']) # Positive, Neutral, Negative
+                    ax1.set_ylabel('') # Hide the default 'Sentiment' label
+                    ax1.set_title('Overall Sentiment Distribution')
+                    st.pyplot(fig1)
+                    # Histogram of Polarity Scores
+                    fig2, ax2 = plt.subplots()
+                    sns.histplot(df['Polarity'], bins=20, kde=True, ax=ax2, color='#2196F3')
+                    ax2.set_title('Distribution of Polarity Scores')
+                    ax2.set_xlabel('Polarity Score')
+                    ax2.set_ylabel('Frequency')
+                    st.pyplot(fig2)
+                    # Display data by sentiment type
+                    st.subheader("View Data by Sentiment Type")
+                    sentiment_filter = st.selectbox(
+                        "Filter by Sentiment:",
+                        ["All", "positive", "neutral", "negative"]
+                    )
+                    if sentiment_filter == "All":
+                        st.dataframe(df[[selected_text_column, 'Sentiment', 'Polarity']])
+                    else:
+                        filtered_df = df[df['Sentiment'] == sentiment_filter]
+                        st.dataframe(filtered_df[[selected_text_column, 'Sentiment', 'Polarity']])
+        else:
+            st.error("Could not process the CSV file. Please check its format.")
+        # Clean up the temporary file
+        if os.path.exists(temp_filepath):
+            os.remove(temp_filepath)
+# --- About Section ---
+elif page == "About":
+    st.header("About Sentilyze")
+    st.write("""
+        Sentilyze is a web application designed to help you understand the sentiment and nuances
+        of text data. It offers:
+        - **Single Text Analysis:** Quickly determine the sentiment (positive, neutral, negative)
+          and potential sarcasm of individual pieces of text.
+        - **CSV File Analysis:** Upload your own datasets (e.g., customer reviews, social media comments)
+          and get an interactive dashboard showing sentiment distribution and polarity.
+        - **Sarcasm Detection:** A feature to estimate the sarcasm percentage in a sentence,
+          with basic highlighting (note: advanced sarcasm highlighting is a complex NLP task).
+        **Technologies Used:**
+        - **Backend:** Python, `pandas`, `TextBlob`, `transformers` (Hugging Face)
+        - **Frontend:** Streamlit
+        - **Deployment:** Docker, GitHub, (potential platforms like Streamlit Community Cloud, Heroku, Render)
+        **Developed by:** [Your Name/Team Name Here]
+        """)
+    st.markdown("[GitHub Repository (Coming Soon!)](#)", unsafe_allow_html=True)
+# --- Footer ---
+st.markdown("""
+    <hr>
+    <p style='text-align: center; color: grey;'>Sentilyze © 2023</p>
+    """, unsafe_allow_html=True)

requirements.txt ADDED Viewed

Binary file (2.21 kB). View file