azaher1215 commited on
Commit
82b68ff
Β·
1 Parent(s): 2660b8c

adding my files AZ

Browse files
.DS_Store ADDED
Binary file (8.2 kB). View file
 
Delete_Later_report.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Report section:
README.md CHANGED
@@ -1,19 +1,58 @@
1
- ---
2
- title: Pattern
3
- emoji: πŸš€
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
- pinned: false
11
- short_description: for pattern recg
12
- ---
13
 
14
- # Welcome to Streamlit!
 
15
 
16
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
 
 
 
 
17
 
18
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
19
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CSE 555 Term Project (Computer Vision and Natural Language Processing)
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ ## Overview
4
+ This project is a multi-featured application focused on food image classification, variation detection, recipe recommendation, and reporting. It leverages deep learning and NLP techniques to provide a comprehensive toolkit for food-related data analysis and user interaction.
5
 
6
+ ## Features
7
+ - **Image Classification:** Classify food images using pre-trained models.
8
+ - **Variation Detection:** Detect variations in food items.
9
+ - **Recipe Recommendation:** Recommend recipes based on user input and image analysis.
10
+ - **Report Generation:** Generate reports based on classification and recommendation results.
11
 
12
+ ## Project Structure
13
+ ```
14
+ PatternRec_Project_Group5/
15
+ β”œβ”€β”€ assets/
16
+ β”‚ β”œβ”€β”€ css/ # Stylesheets
17
+ β”‚ β”œβ”€β”€ modelWeights/ # Pre-trained model weights (.pth)
18
+ β”‚ └── nlp/ # NLP data and models (to be downloaded from google drive once the app runs)
19
+ β”œβ”€β”€ config.py # Configuration file
20
+ β”œβ”€β”€ Scripts/ # Configuration file
21
+ β”‚ β”œβ”€β”€ CV/ # CV Training script
22
+ β”‚ β”œβ”€β”€ NLP/ # NLP Training script
23
+ β”œβ”€β”€ Home.py # Main entry point (possibly Streamlit or similar)
24
+ β”œβ”€β”€ model/ # Model code (classifier, search recipe)
25
+ β”œβ”€β”€ pages/ # App pages (image classification, variation detection, etc.)
26
+ β”œβ”€β”€ utils/ # Utility functions (layout, etc.)
27
+ β”œβ”€β”€ sakenv/ # Python virtual environment
28
+ ```
29
+
30
+ ## Setup Instructions
31
+ 1. **Clone the repository:**
32
+ ```bash
33
+ git clone <repo-url>
34
+ cd PatternRec_Project_Group5
35
+ ```
36
+ 2. **Create and activate the virtual environment: (Already included as sakenv/):**
37
+ ```bash
38
+ source sakenv/bin/activate
39
+ ```
40
+ 3. **Install dependencies:**
41
+ ```bash
42
+ pip install -r requirements.txt
43
+ ```
44
+ 4. **Run the application:**
45
+ - If using Streamlit:
46
+ ```bash
47
+ streamlit run Home.py
48
+ ```
49
+ - Or follow the instructions in `Home.py`.
50
+
51
+ ## Python Version
52
+ - Python 3.12.2
53
+
54
+ ## Notes
55
+ - Model weights are stored in the `assets/` directory.
56
+ - NLP weights were quite large and are stored at [CSE 555 Project Group 5](https://drive.google.com/drive/folders/1m6cfy4NuxIKNDBtJqm150NNN0FSUS8Np)
57
+ - Ensure you have the necessary permissions to access large files in `assets/modelWeights/` and `assets/nlp/`.
58
+ - For best results, use the provided virtual environment and requirements file.
assets/.DS_Store CHANGED
Binary files a/assets/.DS_Store and b/assets/.DS_Store differ
 
config.py CHANGED
@@ -7,9 +7,12 @@ MODEL_PATH_TOMATO = "assets/modelWeights/best_model_tomato_v1.pth"
7
  MODEL_PATH_STRAWBERRY = "assets/modelWeights/best_model_strawberry_v1.pth"
8
 
9
  GOOGLE_DRIVE_FILES = {
10
- 'assets/nlp/torch_recipe_embeddings_231630.pt': '1PSidY1toSfgECXDxa4pGza56Jq6vOq6t',
11
- 'assets/nlp/tag_based_bert_model.pth': '1LBl7yFs5JFqOsgfn88BF9g83W9mxiBm6',
12
- 'assets/nlp/RAW_recipes.csv': '1rFJQzg_ErwEpN6WmhQ4jRyiXv6JCINyf',
13
- 'assets/nlp/recipe_statistics_231630.pkl': '1n8TNT-6EA_usv59CCCU1IXqtuM7i084E',
14
- 'assets/nlp/recipe_scores_231630.pkl': '1gfPBzghKHOZqgJu4VE9NkandAd6FGjrA'
 
 
 
15
  }
 
7
  MODEL_PATH_STRAWBERRY = "assets/modelWeights/best_model_strawberry_v1.pth"
8
 
9
  GOOGLE_DRIVE_FILES = {
10
+ 'assets/nlp/tag_based_bert_model.pth': 'https://drive.google.com/file/d/1CK6es61w4vIPZoMYk_x8lk6zS8v339h7/view?usp=drive_link',
11
+ 'assets/nlp/RAW_recipes.csv': 'https://drive.google.com/file/d/1n69aIEzP1wO-2_NcnKy0KFQfkanaVNV8/view?usp=drive_link',
12
+ 'assets/nlp/RAW_interactions.csv': 'https://drive.google.com/file/d/11WnvKR5P_Bl66Cy2CTKMrIUXvL497q7d/view?usp=drive_link',
13
+ 'assets/nlp/recipe_statistics_231630.pkl': 'https://drive.google.com/file/d/1nTtpaiaY52wKZs8KWehUM70_O0rmPBmO/view?usp=drive_link',
14
+ 'assets/nlp/advanced_recipe_embeddings_231630.npy': 'https://drive.google.com/file/d/1aCzBIdKcyB94qHjz14PnxmbxvD49DBSs/view?usp=drive_link',
15
+ 'assets/nlp/advanced_filtered_recipes_231630.pkl': 'https://drive.google.com/file/d/1SwEmVjoVDrWD43CYynRT99EFRJepzapb/view?usp=drive_link',
16
+ 'assets/nlp/pair_data.parquet': 'https://drive.google.com/file/d/1bs2s6xBFFQHFiVvZDGHfH91H5H-Np9Fa/view?usp=drive_link'
17
+
18
  }
model/recipe_search.py DELETED
@@ -1,139 +0,0 @@
1
- import os
2
- import csv
3
- import ast
4
- import pickle
5
- import gdown
6
- import torch
7
- import torch.nn.functional as F
8
- import streamlit as st
9
- from transformers import BertTokenizer, BertModel
10
- from config import GOOGLE_DRIVE_FILES
11
-
12
-
13
- def download_file_from_drive(file_id: str, destination: str, file_name: str) -> bool:
14
- try:
15
- with st.spinner(f"Downloading {file_name}..."):
16
- url = f"https://drive.google.com/uc?id={file_id}"
17
- gdown.download(url, destination, quiet=False)
18
- return True
19
- except Exception as e:
20
- st.error(f"Failed to download {file_name}: {e}")
21
- return False
22
-
23
- def ensure_files_downloaded():
24
- for filename, file_id in GOOGLE_DRIVE_FILES.items():
25
- if not os.path.exists(filename):
26
- success = download_file_from_drive(file_id, filename, filename)
27
- if not success:
28
- return False
29
- return True
30
-
31
- class GoogleDriveRecipeSearch:
32
- def __init__(self):
33
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
34
-
35
- if not ensure_files_downloaded():
36
- self.is_ready = False
37
- return
38
-
39
- self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
40
- self.model = BertModel.from_pretrained("bert-base-uncased")
41
-
42
- if os.path.exists("assets/nlp/tag_based_bert_model.pth"):
43
- self.model.load_state_dict(
44
- torch.load("assets/nlp/tag_based_bert_model.pth", map_location=self.device)
45
- )
46
- st.success("Trained model loaded successfully!")
47
- else:
48
- st.warning("Using untrained model")
49
-
50
- self.model.to(self.device)
51
- self.model.eval()
52
-
53
- self.load_data()
54
- self.is_ready = True
55
-
56
- def load_data(self):
57
- self.recipe_embeddings = torch.load("assets/nlp/torch_recipe_embeddings_231630.pt", map_location=self.device)
58
- self.recipes = self._load_recipes("assets/nlp/RAW_recipes.csv")
59
- self.recipe_stats = pickle.load(open("assets/nlp/recipe_statistics_231630.pkl", "rb"))
60
- self.recipe_scores = pickle.load(open("assets/nlp/recipe_scores_231630.pkl", "rb"))
61
-
62
- def _load_recipes(self, path):
63
- recipes = []
64
- with open(path, "r", encoding="utf-8") as file:
65
- reader = csv.DictReader(file)
66
- for idx, row in enumerate(reader):
67
- name = row.get("name", "").strip()
68
- if not name or name.lower() in ["nan", "unknown recipe"]:
69
- continue
70
- try:
71
- recipe = {
72
- "id": int(row.get("id", idx)),
73
- "name": name,
74
- "ingredients": ast.literal_eval(row.get("ingredients", "[]")),
75
- "tags": ast.literal_eval(row.get("tags", "[]")),
76
- "minutes": int(float(row.get("minutes", 0))),
77
- "n_steps": int(float(row.get("n_steps", 0))),
78
- "description": row.get("description", ""),
79
- "steps": ast.literal_eval(row.get("steps", "[]"))
80
- }
81
- recipes.append(recipe)
82
- except:
83
- continue
84
- return recipes
85
-
86
- def search_recipes(self, query, num_results=5, min_rating=3.0):
87
- if not query.strip():
88
- return []
89
- print('im here')
90
-
91
- tokens = self.tokenizer(query, return_tensors="pt", truncation=True, padding=True)
92
- tokens = {k: v.to(self.device) for k, v in tokens.items()}
93
-
94
- with torch.no_grad():
95
- outputs = self.model(**tokens)
96
- query_embedding = outputs.last_hidden_state[:, 0, :]
97
-
98
- query_embedding = F.normalize(query_embedding, dim=1)
99
- recipe_embeddings = F.normalize(self.recipe_embeddings, dim=1)
100
-
101
- similarity_scores = torch.matmul(recipe_embeddings, query_embedding.T).squeeze()
102
-
103
- final_scores = []
104
- for i in range(len(self.recipe_embeddings)):
105
- recipe = self.recipes[i]
106
- avg_rating, num_ratings, *_ = self.recipe_stats.get(recipe["id"], (0.0, 0, 0))
107
- if avg_rating < min_rating or num_ratings < 2:
108
- continue
109
- combined_score = (
110
- 0.6 * similarity_scores[i].item() +
111
- 0.4 * self.recipe_scores.get(recipe["id"], 0)
112
- )
113
- final_scores.append((combined_score, i))
114
-
115
- top_matches = sorted(final_scores, key=lambda x: x[0], reverse=True)[:num_results]
116
-
117
- results = []
118
- for score, idx in top_matches:
119
- recipe = self.recipes[idx]
120
- avg_rating, num_ratings, *_ = self.recipe_stats.get(recipe["id"], (0.0, 0, 0))
121
- results.append({
122
- "name": recipe["name"],
123
- "tags": recipe.get("tags", []),
124
- "ingredients": recipe.get("ingredients", []),
125
- "minutes": recipe.get("minutes", 0),
126
- "n_steps": recipe.get("n_steps", 0),
127
- "avg_rating": avg_rating,
128
- "num_ratings": num_ratings,
129
- "similarity_score": similarity_scores[idx].item(),
130
- "combined_score": score,
131
- "steps": recipe.get("steps", []),
132
- "description": recipe.get("description", "")
133
- })
134
-
135
- return results
136
-
137
- @st.cache_resource
138
- def load_search_system():
139
- return GoogleDriveRecipeSearch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{scripts/NLP β†’ model}/search_script.py RENAMED
@@ -3,33 +3,73 @@ import numpy as np
3
  from transformers import BertTokenizer, BertModel
4
  import pickle
5
  import json
 
 
 
 
 
 
 
6
  class RecipeSearchSystem:
7
 
8
- def __init__(self, model_path='tag_based_bert_model.pth', max_recipes=231630):
9
- # Set up device
10
- self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
11
-
12
- # Load tokenizer
13
- self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
14
-
15
- # Load the trained model
16
- self.model = BertModel.from_pretrained('bert-base-uncased')
17
- self.model.load_state_dict(torch.load(model_path, map_location=self.device))
18
-
19
- self.model.to(self.device)
20
- self.model.eval()
21
-
22
- # Load all the preprocessed files
23
- self.max_recipes = max_recipes
24
- #load recipe embeddings
25
- self.recipe_embeddings = np.load(f'advanced_recipe_embeddings_{self.max_recipes}.npy')
26
- #load recipes dataframe
27
- with open(f'advanced_filtered_recipes_{self.max_recipes}.pkl', 'rb') as f:
28
- self.recipes_df = pickle.load(f)
29
- #load recipe statistics
30
- with open(f'recipe_statistics_{self.max_recipes}.pkl', 'rb') as f:
31
- self.recipe_stats = pickle.load(f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
 
 
 
 
 
 
 
 
33
 
34
  def create_query_embedding(self, user_query):
35
 
@@ -145,7 +185,6 @@ class RecipeSearchSystem:
145
  'unique_users': int(unique_users)
146
  }
147
 
148
- result = json.dumps(result)
149
  return result
150
 
151
  def search_recipes(self, user_query, top_k=5, min_rating=3.0, min_num_ratings=5):
@@ -177,16 +216,8 @@ class RecipeSearchSystem:
177
  return final_results
178
 
179
 
180
- def search_for_recipes(user_query, top_k=5, min_rating=3.0, min_num_ratings=5):
181
- search_system = RecipeSearchSystem()
182
- results = search_system.search_recipes(
183
- user_query=user_query,
184
- top_k=top_k,
185
- min_rating=min_rating,
186
- min_num_ratings=min_num_ratings
187
- )
188
-
189
- return results
190
 
191
 
192
  if __name__ == "__main__":
 
3
  from transformers import BertTokenizer, BertModel
4
  import pickle
5
  import json
6
+ import gdown
7
+ import os
8
+ import sys
9
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
10
+ from config import GOOGLE_DRIVE_FILES
11
+
12
+
13
  class RecipeSearchSystem:
14
 
15
+ def __init__(self, max_recipes=231630):
16
+ try:
17
+ # Load all the preprocessed files
18
+ self.max_recipes = max_recipes
19
+ file_paths = {
20
+ 'recipe_embeddings': GOOGLE_DRIVE_FILES['assets/nlp/advanced_recipe_embeddings_231630.npy'],
21
+ 'recipes_df': GOOGLE_DRIVE_FILES['assets/nlp/advanced_filtered_recipes_231630.pkl'],
22
+ 'recipe_stats': GOOGLE_DRIVE_FILES['assets/nlp/recipe_statistics_231630.pkl'],
23
+ 'model': GOOGLE_DRIVE_FILES['assets/nlp/tag_based_bert_model.pth']
24
+ }
25
+ output_path = "assets/nlp/"
26
+ #download files from google drive
27
+ self.ensure_files_exist(file_paths, output_path)
28
+ # Set up device
29
+ self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
30
+
31
+ # Load tokenizer
32
+ self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
33
+
34
+ # Load the trained model
35
+ self.model = BertModel.from_pretrained('bert-base-uncased')
36
+ self.model.load_state_dict(torch.load(f'{output_path}tag_based_bert_model.pth', map_location=self.device))
37
+
38
+ self.model.to(self.device)
39
+ self.model.eval()
40
+
41
+ #load recipe embeddings
42
+ self.recipe_embeddings = np.load(f'{output_path}advanced_recipe_embeddings_{self.max_recipes}.npy')
43
+ #load recipes dataframe
44
+ with open(f'{output_path}advanced_filtered_recipes_{self.max_recipes}.pkl', 'rb') as f:
45
+ self.recipes_df = pickle.load(f)
46
+ #load recipe statistics
47
+ with open(f'{output_path}recipe_statistics_{self.max_recipes}.pkl', 'rb') as f:
48
+ self.recipe_stats = pickle.load(f)
49
+ self.is_ready = True
50
+ except Exception as e:
51
+ print(f"Error initializing search system: {e}")
52
+ self.is_ready = False
53
+
54
+ def ensure_files_exist(self, file_paths, output_path):
55
+ # Create output directory if it doesn't exist
56
+ os.makedirs(output_path, exist_ok=True)
57
+
58
+ file_mapping = {
59
+ 'recipe_embeddings': f'advanced_recipe_embeddings_{self.max_recipes}.npy',
60
+ 'recipes_df': f'advanced_filtered_recipes_{self.max_recipes}.pkl',
61
+ 'recipe_stats': f'recipe_statistics_{self.max_recipes}.pkl',
62
+ 'model': f'tag_based_bert_model.pth'
63
+ }
64
 
65
+ for key, local_filename in file_mapping.items():
66
+ local_path = os.path.join(output_path, local_filename)
67
+ if not os.path.exists(local_path):
68
+ print(f"Downloading {local_filename}...")
69
+ gdown.download(file_paths[key], local_path, quiet=False, fuzzy=True)
70
+ print(f"Downloaded {local_filename}")
71
+ else:
72
+ print(f"{local_filename} already exists, skipping download")
73
 
74
  def create_query_embedding(self, user_query):
75
 
 
185
  'unique_users': int(unique_users)
186
  }
187
 
 
188
  return result
189
 
190
  def search_recipes(self, user_query, top_k=5, min_rating=3.0, min_num_ratings=5):
 
216
  return final_results
217
 
218
 
219
+ def search_for_recipes():
220
+ return RecipeSearchSystem()
 
 
 
 
 
 
 
 
221
 
222
 
223
  if __name__ == "__main__":
pages/3_Recipe_Recommendation.py CHANGED
@@ -1,7 +1,7 @@
1
  from utils.layout import render_layout
2
  import streamlit as st
3
  import time
4
- from model.recipe_search import load_search_system # assumed you modularized this logic
5
  import streamlit.components.v1 as components
6
 
7
  def recipe_search_page():
@@ -15,7 +15,7 @@ def recipe_search_page():
15
 
16
  if 'search_system' not in st.session_state:
17
  with st.spinner("πŸ”„ Initializing recipe search system..."):
18
- st.session_state.search_system = load_search_system()
19
 
20
  search_system = st.session_state.search_system
21
 
 
1
  from utils.layout import render_layout
2
  import streamlit as st
3
  import time
4
+ from model.search_script import search_for_recipes # assumed you modularized this logic
5
  import streamlit.components.v1 as components
6
 
7
  def recipe_search_page():
 
15
 
16
  if 'search_system' not in st.session_state:
17
  with st.spinner("πŸ”„ Initializing recipe search system..."):
18
+ st.session_state.search_system = search_for_recipes()
19
 
20
  search_system = st.session_state.search_system
21
 
scripts/.DS_Store CHANGED
Binary files a/scripts/.DS_Store and b/scripts/.DS_Store differ
 
scripts/NLP/.DS_Store ADDED
Binary file (6.15 kB). View file