Spaces:

PatternGroup5
/

pattern

Sleeping

App Files Files Community

azaher1215 commited on 26 days ago

Commit

82b68ff

1 Parent(s): 2660b8c

adding my files AZ

Browse files

Files changed (10) hide show

.DS_Store +0 -0
Delete_Later_report.txt +1 -0
README.md +55 -16
assets/.DS_Store +0 -0
config.py +8 -5
model/recipe_search.py +0 -139
{scripts/NLP → model}/search_script.py +66 -35
pages/3_Recipe_Recommendation.py +2 -2
scripts/.DS_Store +0 -0
scripts/NLP/.DS_Store +0 -0

.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

Delete_Later_report.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ Report section:

README.md CHANGED Viewed

@@ -1,19 +1,58 @@
----
-title: Pattern
-emoji: 🚀
-colorFrom: red
-colorTo: red
-sdk: docker
-app_port: 8501
-tags:
-- streamlit
-pinned: false
-short_description: for pattern recg
----
-# Welcome to Streamlit!
-Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).

+# CSE 555 Term Project (Computer Vision and Natural Language Processing)
+## Overview
+This project is a multi-featured application focused on food image classification, variation detection, recipe recommendation, and reporting. It leverages deep learning and NLP techniques to provide a comprehensive toolkit for food-related data analysis and user interaction.
+## Features
+- **Image Classification:** Classify food images using pre-trained models.
+- **Variation Detection:** Detect variations in food items.
+- **Recipe Recommendation:** Recommend recipes based on user input and image analysis.
+- **Report Generation:** Generate reports based on classification and recommendation results.
+## Project Structure
+```
+PatternRec_Project_Group5/
+├── assets/
+│   ├── css/                # Stylesheets
+│   ├── modelWeights/       # Pre-trained model weights (.pth)
+│   └── nlp/                # NLP data and models (to be downloaded from google drive once the app runs)
+├── config.py               # Configuration file
+├── Scripts/                # Configuration file
+│   ├── CV/                 # CV Training script
+│   ├── NLP/                # NLP Training script
+├── Home.py                 # Main entry point (possibly Streamlit or similar)
+├── model/                  # Model code (classifier, search recipe)
+├── pages/                  # App pages (image classification, variation detection, etc.)
+├── utils/                  # Utility functions (layout, etc.)
+├── sakenv/                 # Python virtual environment
+```
+## Setup Instructions
+1. **Clone the repository:**
+   ```bash
+   git clone <repo-url>
+   cd PatternRec_Project_Group5
+   ```
+2. **Create and activate the virtual environment: (Already included as sakenv/):**
+   ```bash
+   source sakenv/bin/activate
+   ```
+3. **Install dependencies:**
+   ```bash
+   pip install -r requirements.txt
+   ```
+4. **Run the application:**
+   - If using Streamlit:
+     ```bash
+     streamlit run Home.py
+     ```
+   - Or follow the instructions in `Home.py`.
+## Python Version
+- Python 3.12.2
+## Notes
+- Model weights are stored in the `assets/` directory.
+- NLP weights were quite large and are stored at [CSE 555 Project Group 5](https://drive.google.com/drive/folders/1m6cfy4NuxIKNDBtJqm150NNN0FSUS8Np)
+- Ensure you have the necessary permissions to access large files in `assets/modelWeights/` and `assets/nlp/`.
+- For best results, use the provided virtual environment and requirements file.

assets/.DS_Store CHANGED Viewed

Binary files a/assets/.DS_Store and b/assets/.DS_Store differ

config.py CHANGED Viewed

@@ -7,9 +7,12 @@ MODEL_PATH_TOMATO  = "assets/modelWeights/best_model_tomato_v1.pth"
 MODEL_PATH_STRAWBERRY  = "assets/modelWeights/best_model_strawberry_v1.pth"
 GOOGLE_DRIVE_FILES = {
-    'assets/nlp/torch_recipe_embeddings_231630.pt': '1PSidY1toSfgECXDxa4pGza56Jq6vOq6t',
-    'assets/nlp/tag_based_bert_model.pth': '1LBl7yFs5JFqOsgfn88BF9g83W9mxiBm6',
-    'assets/nlp/RAW_recipes.csv': '1rFJQzg_ErwEpN6WmhQ4jRyiXv6JCINyf',
-    'assets/nlp/recipe_statistics_231630.pkl': '1n8TNT-6EA_usv59CCCU1IXqtuM7i084E',
-    'assets/nlp/recipe_scores_231630.pkl': '1gfPBzghKHOZqgJu4VE9NkandAd6FGjrA'
 }

 MODEL_PATH_STRAWBERRY  = "assets/modelWeights/best_model_strawberry_v1.pth"
 GOOGLE_DRIVE_FILES = {
+    'assets/nlp/tag_based_bert_model.pth': 'https://drive.google.com/file/d/1CK6es61w4vIPZoMYk_x8lk6zS8v339h7/view?usp=drive_link',
+    'assets/nlp/RAW_recipes.csv': 'https://drive.google.com/file/d/1n69aIEzP1wO-2_NcnKy0KFQfkanaVNV8/view?usp=drive_link',
+    'assets/nlp/RAW_interactions.csv': 'https://drive.google.com/file/d/11WnvKR5P_Bl66Cy2CTKMrIUXvL497q7d/view?usp=drive_link',
+    'assets/nlp/recipe_statistics_231630.pkl': 'https://drive.google.com/file/d/1nTtpaiaY52wKZs8KWehUM70_O0rmPBmO/view?usp=drive_link',
+    'assets/nlp/advanced_recipe_embeddings_231630.npy': 'https://drive.google.com/file/d/1aCzBIdKcyB94qHjz14PnxmbxvD49DBSs/view?usp=drive_link',
+    'assets/nlp/advanced_filtered_recipes_231630.pkl': 'https://drive.google.com/file/d/1SwEmVjoVDrWD43CYynRT99EFRJepzapb/view?usp=drive_link',
+    'assets/nlp/pair_data.parquet': 'https://drive.google.com/file/d/1bs2s6xBFFQHFiVvZDGHfH91H5H-Np9Fa/view?usp=drive_link'
 }

model/recipe_search.py DELETED Viewed

@@ -1,139 +0,0 @@
-import os
-import csv
-import ast
-import pickle
-import gdown
-import torch
-import torch.nn.functional as F
-import streamlit as st
-from transformers import BertTokenizer, BertModel
-from config import GOOGLE_DRIVE_FILES
-def download_file_from_drive(file_id: str, destination: str, file_name: str) -> bool:
-    try:
-        with st.spinner(f"Downloading {file_name}..."):
-            url = f"https://drive.google.com/uc?id={file_id}"
-            gdown.download(url, destination, quiet=False)
-        return True
-    except Exception as e:
-        st.error(f"Failed to download {file_name}: {e}")
-        return False
-def ensure_files_downloaded():
-    for filename, file_id in GOOGLE_DRIVE_FILES.items():
-        if not os.path.exists(filename):
-            success = download_file_from_drive(file_id, filename, filename)
-            if not success:
-                return False
-    return True
-class GoogleDriveRecipeSearch:
-    def __init__(self):
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        if not ensure_files_downloaded():
-            self.is_ready = False
-            return
-        self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
-        self.model = BertModel.from_pretrained("bert-base-uncased")
-        if os.path.exists("assets/nlp/tag_based_bert_model.pth"):
-            self.model.load_state_dict(
-                torch.load("assets/nlp/tag_based_bert_model.pth", map_location=self.device)
-            )
-            st.success("Trained model loaded successfully!")
-        else:
-            st.warning("Using untrained model")
-        self.model.to(self.device)
-        self.model.eval()
-        self.load_data()
-        self.is_ready = True
-    def load_data(self):
-        self.recipe_embeddings = torch.load("assets/nlp/torch_recipe_embeddings_231630.pt", map_location=self.device)
-        self.recipes = self._load_recipes("assets/nlp/RAW_recipes.csv")
-        self.recipe_stats = pickle.load(open("assets/nlp/recipe_statistics_231630.pkl", "rb"))
-        self.recipe_scores = pickle.load(open("assets/nlp/recipe_scores_231630.pkl", "rb"))
-    def _load_recipes(self, path):
-        recipes = []
-        with open(path, "r", encoding="utf-8") as file:
-            reader = csv.DictReader(file)
-            for idx, row in enumerate(reader):
-                name = row.get("name", "").strip()
-                if not name or name.lower() in ["nan", "unknown recipe"]:
-                    continue
-                try:
-                    recipe = {
-                        "id": int(row.get("id", idx)),
-                        "name": name,
-                        "ingredients": ast.literal_eval(row.get("ingredients", "[]")),
-                        "tags": ast.literal_eval(row.get("tags", "[]")),
-                        "minutes": int(float(row.get("minutes", 0))),
-                        "n_steps": int(float(row.get("n_steps", 0))),
-                        "description": row.get("description", ""),
-                        "steps": ast.literal_eval(row.get("steps", "[]"))
-                    }
-                    recipes.append(recipe)
-                except:
-                    continue
-        return recipes
-    def search_recipes(self, query, num_results=5, min_rating=3.0):
-        if not query.strip():
-            return []
-        print('im here')
-        tokens = self.tokenizer(query, return_tensors="pt", truncation=True, padding=True)
-        tokens = {k: v.to(self.device) for k, v in tokens.items()}
-        with torch.no_grad():
-            outputs = self.model(**tokens)
-            query_embedding = outputs.last_hidden_state[:, 0, :]
-        query_embedding = F.normalize(query_embedding, dim=1)
-        recipe_embeddings = F.normalize(self.recipe_embeddings, dim=1)
-        similarity_scores = torch.matmul(recipe_embeddings, query_embedding.T).squeeze()
-        final_scores = []
-        for i in range(len(self.recipe_embeddings)):
-            recipe = self.recipes[i]
-            avg_rating, num_ratings, *_ = self.recipe_stats.get(recipe["id"], (0.0, 0, 0))
-            if avg_rating < min_rating or num_ratings < 2:
-                continue
-            combined_score = (
-                0.6 * similarity_scores[i].item() +
-                0.4 * self.recipe_scores.get(recipe["id"], 0)
-            )
-            final_scores.append((combined_score, i))
-        top_matches = sorted(final_scores, key=lambda x: x[0], reverse=True)[:num_results]
-        results = []
-        for score, idx in top_matches:
-            recipe = self.recipes[idx]
-            avg_rating, num_ratings, *_ = self.recipe_stats.get(recipe["id"], (0.0, 0, 0))
-            results.append({
-                "name": recipe["name"],
-                "tags": recipe.get("tags", []),
-                "ingredients": recipe.get("ingredients", []),
-                "minutes": recipe.get("minutes", 0),
-                "n_steps": recipe.get("n_steps", 0),
-                "avg_rating": avg_rating,
-                "num_ratings": num_ratings,
-                "similarity_score": similarity_scores[idx].item(),
-                "combined_score": score,
-                "steps": recipe.get("steps", []),
-                "description": recipe.get("description", "")
-            })
-        return results
-@st.cache_resource
-def load_search_system():
-    return GoogleDriveRecipeSearch()

{scripts/NLP → model}/search_script.py RENAMED Viewed

@@ -3,33 +3,73 @@ import numpy as np
 from transformers import BertTokenizer, BertModel
 import pickle
 import json
 class RecipeSearchSystem:
-    def __init__(self, model_path='tag_based_bert_model.pth', max_recipes=231630):
-        # Set up device
-        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        # Load tokenizer
-        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
-        # Load the trained model
-        self.model = BertModel.from_pretrained('bert-base-uncased')
-        self.model.load_state_dict(torch.load(model_path, map_location=self.device))
-        self.model.to(self.device)
-        self.model.eval()
-        # Load all the preprocessed files
-        self.max_recipes = max_recipes
-        #load recipe embeddings
-        self.recipe_embeddings = np.load(f'advanced_recipe_embeddings_{self.max_recipes}.npy')
-        #load recipes dataframe
-        with open(f'advanced_filtered_recipes_{self.max_recipes}.pkl', 'rb') as f:
-            self.recipes_df = pickle.load(f)
-        #load recipe statistics
-        with open(f'recipe_statistics_{self.max_recipes}.pkl', 'rb') as f:
-            self.recipe_stats = pickle.load(f)
     def create_query_embedding(self, user_query):
@@ -145,7 +185,6 @@ class RecipeSearchSystem:
             'unique_users': int(unique_users)
         }
-        result = json.dumps(result)
         return result
     def search_recipes(self, user_query, top_k=5, min_rating=3.0, min_num_ratings=5):
@@ -177,16 +216,8 @@ class RecipeSearchSystem:
         return final_results
-def search_for_recipes(user_query, top_k=5, min_rating=3.0, min_num_ratings=5):
-    search_system = RecipeSearchSystem()
-    results = search_system.search_recipes(
-        user_query=user_query,
-        top_k=top_k,
-        min_rating=min_rating,
-        min_num_ratings=min_num_ratings
-    )
-    return results
 if __name__ == "__main__":

 from transformers import BertTokenizer, BertModel
 import pickle
 import json
+import gdown
+import os
+import sys
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
+from config import GOOGLE_DRIVE_FILES
 class RecipeSearchSystem:
+    def __init__(self, max_recipes=231630):
+        try:
+            # Load all the preprocessed files
+            self.max_recipes = max_recipes
+            file_paths = {
+                'recipe_embeddings': GOOGLE_DRIVE_FILES['assets/nlp/advanced_recipe_embeddings_231630.npy'],
+                'recipes_df': GOOGLE_DRIVE_FILES['assets/nlp/advanced_filtered_recipes_231630.pkl'],
+                'recipe_stats': GOOGLE_DRIVE_FILES['assets/nlp/recipe_statistics_231630.pkl'],
+                'model': GOOGLE_DRIVE_FILES['assets/nlp/tag_based_bert_model.pth']
+                }
+            output_path = "assets/nlp/"
+            #download files from google drive
+            self.ensure_files_exist(file_paths, output_path)
+            # Set up device
+            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+            # Load tokenizer
+            self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+            # Load the trained model
+            self.model = BertModel.from_pretrained('bert-base-uncased')
+            self.model.load_state_dict(torch.load(f'{output_path}tag_based_bert_model.pth', map_location=self.device))
+            self.model.to(self.device)
+            self.model.eval()
+            #load recipe embeddings
+            self.recipe_embeddings = np.load(f'{output_path}advanced_recipe_embeddings_{self.max_recipes}.npy')
+            #load recipes dataframe
+            with open(f'{output_path}advanced_filtered_recipes_{self.max_recipes}.pkl', 'rb') as f:
+                self.recipes_df = pickle.load(f)
+            #load recipe statistics
+            with open(f'{output_path}recipe_statistics_{self.max_recipes}.pkl', 'rb') as f:
+                self.recipe_stats = pickle.load(f)
+            self.is_ready = True
+        except Exception as e:
+            print(f"Error initializing search system: {e}")
+            self.is_ready = False
+    def ensure_files_exist(self, file_paths, output_path):
+        # Create output directory if it doesn't exist
+        os.makedirs(output_path, exist_ok=True)
+        file_mapping = {
+            'recipe_embeddings': f'advanced_recipe_embeddings_{self.max_recipes}.npy',
+            'recipes_df': f'advanced_filtered_recipes_{self.max_recipes}.pkl',
+            'recipe_stats': f'recipe_statistics_{self.max_recipes}.pkl',
+            'model': f'tag_based_bert_model.pth'
+        }
+        for key, local_filename in file_mapping.items():
+            local_path = os.path.join(output_path, local_filename)
+            if not os.path.exists(local_path):
+                print(f"Downloading {local_filename}...")
+                gdown.download(file_paths[key], local_path, quiet=False, fuzzy=True)
+                print(f"Downloaded {local_filename}")
+            else:
+                print(f"{local_filename} already exists, skipping download")
     def create_query_embedding(self, user_query):
             'unique_users': int(unique_users)
         }
         return result
     def search_recipes(self, user_query, top_k=5, min_rating=3.0, min_num_ratings=5):
         return final_results
+def search_for_recipes():
+    return RecipeSearchSystem()
 if __name__ == "__main__":

pages/3_Recipe_Recommendation.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from utils.layout import render_layout
 import streamlit as st
 import time
-from model.recipe_search import load_search_system  # assumed you modularized this logic
 import streamlit.components.v1 as components
 def recipe_search_page():
@@ -15,7 +15,7 @@ def recipe_search_page():
     if 'search_system' not in st.session_state:
         with st.spinner("🔄 Initializing recipe search system..."):
-            st.session_state.search_system = load_search_system()
     search_system = st.session_state.search_system

 from utils.layout import render_layout
 import streamlit as st
 import time
+from model.search_script import search_for_recipes  # assumed you modularized this logic
 import streamlit.components.v1 as components
 def recipe_search_page():
     if 'search_system' not in st.session_state:
         with st.spinner("🔄 Initializing recipe search system..."):
+            st.session_state.search_system = search_for_recipes()
     search_system = st.session_state.search_system

scripts/.DS_Store CHANGED Viewed

Binary files a/scripts/.DS_Store and b/scripts/.DS_Store differ

scripts/NLP/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file