Mohammed Foud commited on
Commit
d0aaf42
·
1 Parent(s): eee2ac7

first commit

Browse files
Files changed (2) hide show
  1. Dockerfile +16 -2
  2. app.py +19 -7
Dockerfile CHANGED
@@ -4,6 +4,16 @@ FROM python:3.9-slim
4
  # Set working directory
5
  WORKDIR /app
6
 
 
 
 
 
 
 
 
 
 
 
7
  # Copy requirements first to leverage Docker cache
8
  COPY requirements.txt .
9
 
@@ -13,8 +23,12 @@ RUN pip install --no-cache-dir -r requirements.txt
13
  # Copy the rest of the application
14
  COPY . .
15
 
16
- # Create directory for model
17
- RUN mkdir -p /app/final_model
 
 
 
 
18
 
19
  # Expose port 7860 for Gradio
20
  EXPOSE 7860
 
4
  # Set working directory
5
  WORKDIR /app
6
 
7
+ # Create a non-root user
8
+ RUN useradd -m -u 1000 user && \
9
+ chown -R user:user /app
10
+
11
+ # Set environment variables
12
+ ENV PYTHONUNBUFFERED=1 \
13
+ TRANSFORMERS_CACHE=/app/cache \
14
+ MPLCONFIGDIR=/app/matplotlib \
15
+ HOME=/app
16
+
17
  # Copy requirements first to leverage Docker cache
18
  COPY requirements.txt .
19
 
 
23
  # Copy the rest of the application
24
  COPY . .
25
 
26
+ # Create necessary directories and set permissions
27
+ RUN mkdir -p /app/cache /app/matplotlib && \
28
+ chown -R user:user /app
29
+
30
+ # Switch to non-root user
31
+ USER user
32
 
33
  # Expose port 7860 for Gradio
34
  EXPOSE 7860
app.py CHANGED
@@ -16,7 +16,17 @@ from tabulate import tabulate
16
  model_path = "./final_model"
17
  tokenizer = AutoTokenizer.from_pretrained(model_path)
18
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
19
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 
 
 
 
 
 
 
 
 
 
20
 
21
  # Load dataset
22
  def load_dataset():
@@ -37,10 +47,12 @@ def get_initial_summary():
37
  if df is None:
38
  return "Error: Could not load dataset.csv"
39
 
40
- # Take a sample of reviews for initial summary
41
- sample_reviews = df['reviews.text'].sample(n=min(100, len(df))).fillna('').tolist()
42
- sample_text = '\n'.join(sample_reviews)
43
- return generate_category_summary(sample_text)
 
 
44
 
45
  def predict_sentiment(text):
46
  # Preprocess text
@@ -113,8 +125,8 @@ def generate_category_summary(reviews_text):
113
  Based on {len(reviews)} reviews analyzed.
114
  """
115
 
116
- # Generate concise summary using BART
117
- if len(summary_text) > 100:
118
  try:
119
  generated_summary = summarizer(
120
  summary_text,
 
16
  model_path = "./final_model"
17
  tokenizer = AutoTokenizer.from_pretrained(model_path)
18
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
19
+
20
+ # Initialize summarizer with a smaller model and TF weights
21
+ try:
22
+ summarizer = pipeline(
23
+ "summarization",
24
+ model="sshleifer/distilbart-cnn-6-6",
25
+ device=-1 # Use CPU
26
+ )
27
+ except Exception as e:
28
+ print(f"Error loading summarizer: {str(e)}")
29
+ summarizer = None
30
 
31
  # Load dataset
32
  def load_dataset():
 
47
  if df is None:
48
  return "Error: Could not load dataset.csv"
49
 
50
+ try:
51
+ sample_reviews = df['reviews.text'].sample(n=min(50, len(df))).fillna('').tolist()
52
+ sample_text = '\n'.join(sample_reviews)
53
+ return generate_category_summary(sample_text)
54
+ except Exception as e:
55
+ return f"Error generating initial summary: {str(e)}"
56
 
57
  def predict_sentiment(text):
58
  # Preprocess text
 
125
  Based on {len(reviews)} reviews analyzed.
126
  """
127
 
128
+ # Generate concise summary using BART if available
129
+ if summarizer and len(summary_text) > 100:
130
  try:
131
  generated_summary = summarizer(
132
  summary_text,