Spaces:

mfoud444
/

oop

Running

Mohammed Foud commited on Apr 14

Commit

d0aaf42

1 Parent(s): eee2ac7

first commit

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -4,6 +4,16 @@ FROM python:3.9-slim
 # Set working directory
 WORKDIR /app
 # Copy requirements first to leverage Docker cache
 COPY requirements.txt .
@@ -13,8 +23,12 @@ RUN pip install --no-cache-dir -r requirements.txt
 # Copy the rest of the application
 COPY . .
-# Create directory for model
-RUN mkdir -p /app/final_model
 # Expose port 7860 for Gradio
 EXPOSE 7860

 # Set working directory
 WORKDIR /app
+# Create a non-root user
+RUN useradd -m -u 1000 user && \
+    chown -R user:user /app
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    TRANSFORMERS_CACHE=/app/cache \
+    MPLCONFIGDIR=/app/matplotlib \
+    HOME=/app
 # Copy requirements first to leverage Docker cache
 COPY requirements.txt .
 # Copy the rest of the application
 COPY . .
+# Create necessary directories and set permissions
+RUN mkdir -p /app/cache /app/matplotlib && \
+    chown -R user:user /app
+# Switch to non-root user
+USER user
 # Expose port 7860 for Gradio
 EXPOSE 7860

app.py CHANGED Viewed

@@ -16,7 +16,17 @@ from tabulate import tabulate
 model_path = "./final_model"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 model = AutoModelForSequenceClassification.from_pretrained(model_path)
-summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 # Load dataset
 def load_dataset():
@@ -37,10 +47,12 @@ def get_initial_summary():
     if df is None:
         return "Error: Could not load dataset.csv"
-    # Take a sample of reviews for initial summary
-    sample_reviews = df['reviews.text'].sample(n=min(100, len(df))).fillna('').tolist()
-    sample_text = '\n'.join(sample_reviews)
-    return generate_category_summary(sample_text)
 def predict_sentiment(text):
     # Preprocess text
@@ -113,8 +125,8 @@ def generate_category_summary(reviews_text):
     Based on {len(reviews)} reviews analyzed.
     """
-    # Generate concise summary using BART
-    if len(summary_text) > 100:
         try:
             generated_summary = summarizer(
                 summary_text,

 model_path = "./final_model"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 model = AutoModelForSequenceClassification.from_pretrained(model_path)
+# Initialize summarizer with a smaller model and TF weights
+try:
+    summarizer = pipeline(
+        "summarization",
+        model="sshleifer/distilbart-cnn-6-6",
+        device=-1  # Use CPU
+    )
+except Exception as e:
+    print(f"Error loading summarizer: {str(e)}")
+    summarizer = None
 # Load dataset
 def load_dataset():
     if df is None:
         return "Error: Could not load dataset.csv"
+    try:
+        sample_reviews = df['reviews.text'].sample(n=min(50, len(df))).fillna('').tolist()
+        sample_text = '\n'.join(sample_reviews)
+        return generate_category_summary(sample_text)
+    except Exception as e:
+        return f"Error generating initial summary: {str(e)}"
 def predict_sentiment(text):
     # Preprocess text
     Based on {len(reviews)} reviews analyzed.
     """
+    # Generate concise summary using BART if available
+    if summarizer and len(summary_text) > 100:
         try:
             generated_summary = summarizer(
                 summary_text,