Spaces:
Sleeping
Sleeping
azaher1215
commited on
Commit
Β·
82b68ff
1
Parent(s):
2660b8c
adding my files AZ
Browse files- .DS_Store +0 -0
- Delete_Later_report.txt +1 -0
- README.md +55 -16
- assets/.DS_Store +0 -0
- config.py +8 -5
- model/recipe_search.py +0 -139
- {scripts/NLP β model}/search_script.py +66 -35
- pages/3_Recipe_Recommendation.py +2 -2
- scripts/.DS_Store +0 -0
- scripts/NLP/.DS_Store +0 -0
.DS_Store
ADDED
Binary file (8.2 kB). View file
|
|
Delete_Later_report.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Report section:
|
README.md
CHANGED
@@ -1,19 +1,58 @@
|
|
1 |
-
|
2 |
-
title: Pattern
|
3 |
-
emoji: π
|
4 |
-
colorFrom: red
|
5 |
-
colorTo: red
|
6 |
-
sdk: docker
|
7 |
-
app_port: 8501
|
8 |
-
tags:
|
9 |
-
- streamlit
|
10 |
-
pinned: false
|
11 |
-
short_description: for pattern recg
|
12 |
-
---
|
13 |
|
14 |
-
|
|
|
15 |
|
16 |
-
|
|
|
|
|
|
|
|
|
17 |
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# CSE 555 Term Project (Computer Vision and Natural Language Processing)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
## Overview
|
4 |
+
This project is a multi-featured application focused on food image classification, variation detection, recipe recommendation, and reporting. It leverages deep learning and NLP techniques to provide a comprehensive toolkit for food-related data analysis and user interaction.
|
5 |
|
6 |
+
## Features
|
7 |
+
- **Image Classification:** Classify food images using pre-trained models.
|
8 |
+
- **Variation Detection:** Detect variations in food items.
|
9 |
+
- **Recipe Recommendation:** Recommend recipes based on user input and image analysis.
|
10 |
+
- **Report Generation:** Generate reports based on classification and recommendation results.
|
11 |
|
12 |
+
## Project Structure
|
13 |
+
```
|
14 |
+
PatternRec_Project_Group5/
|
15 |
+
βββ assets/
|
16 |
+
β βββ css/ # Stylesheets
|
17 |
+
β βββ modelWeights/ # Pre-trained model weights (.pth)
|
18 |
+
β βββ nlp/ # NLP data and models (to be downloaded from google drive once the app runs)
|
19 |
+
βββ config.py # Configuration file
|
20 |
+
βββ Scripts/ # Configuration file
|
21 |
+
β βββ CV/ # CV Training script
|
22 |
+
β βββ NLP/ # NLP Training script
|
23 |
+
βββ Home.py # Main entry point (possibly Streamlit or similar)
|
24 |
+
βββ model/ # Model code (classifier, search recipe)
|
25 |
+
βββ pages/ # App pages (image classification, variation detection, etc.)
|
26 |
+
βββ utils/ # Utility functions (layout, etc.)
|
27 |
+
βββ sakenv/ # Python virtual environment
|
28 |
+
```
|
29 |
+
|
30 |
+
## Setup Instructions
|
31 |
+
1. **Clone the repository:**
|
32 |
+
```bash
|
33 |
+
git clone <repo-url>
|
34 |
+
cd PatternRec_Project_Group5
|
35 |
+
```
|
36 |
+
2. **Create and activate the virtual environment: (Already included as sakenv/):**
|
37 |
+
```bash
|
38 |
+
source sakenv/bin/activate
|
39 |
+
```
|
40 |
+
3. **Install dependencies:**
|
41 |
+
```bash
|
42 |
+
pip install -r requirements.txt
|
43 |
+
```
|
44 |
+
4. **Run the application:**
|
45 |
+
- If using Streamlit:
|
46 |
+
```bash
|
47 |
+
streamlit run Home.py
|
48 |
+
```
|
49 |
+
- Or follow the instructions in `Home.py`.
|
50 |
+
|
51 |
+
## Python Version
|
52 |
+
- Python 3.12.2
|
53 |
+
|
54 |
+
## Notes
|
55 |
+
- Model weights are stored in the `assets/` directory.
|
56 |
+
- NLP weights were quite large and are stored at [CSE 555 Project Group 5](https://drive.google.com/drive/folders/1m6cfy4NuxIKNDBtJqm150NNN0FSUS8Np)
|
57 |
+
- Ensure you have the necessary permissions to access large files in `assets/modelWeights/` and `assets/nlp/`.
|
58 |
+
- For best results, use the provided virtual environment and requirements file.
|
assets/.DS_Store
CHANGED
Binary files a/assets/.DS_Store and b/assets/.DS_Store differ
|
|
config.py
CHANGED
@@ -7,9 +7,12 @@ MODEL_PATH_TOMATO = "assets/modelWeights/best_model_tomato_v1.pth"
|
|
7 |
MODEL_PATH_STRAWBERRY = "assets/modelWeights/best_model_strawberry_v1.pth"
|
8 |
|
9 |
GOOGLE_DRIVE_FILES = {
|
10 |
-
'assets/nlp/
|
11 |
-
'assets/nlp/
|
12 |
-
'assets/nlp/
|
13 |
-
'assets/nlp/recipe_statistics_231630.pkl': '
|
14 |
-
'assets/nlp/
|
|
|
|
|
|
|
15 |
}
|
|
|
7 |
MODEL_PATH_STRAWBERRY = "assets/modelWeights/best_model_strawberry_v1.pth"
|
8 |
|
9 |
GOOGLE_DRIVE_FILES = {
|
10 |
+
'assets/nlp/tag_based_bert_model.pth': 'https://drive.google.com/file/d/1CK6es61w4vIPZoMYk_x8lk6zS8v339h7/view?usp=drive_link',
|
11 |
+
'assets/nlp/RAW_recipes.csv': 'https://drive.google.com/file/d/1n69aIEzP1wO-2_NcnKy0KFQfkanaVNV8/view?usp=drive_link',
|
12 |
+
'assets/nlp/RAW_interactions.csv': 'https://drive.google.com/file/d/11WnvKR5P_Bl66Cy2CTKMrIUXvL497q7d/view?usp=drive_link',
|
13 |
+
'assets/nlp/recipe_statistics_231630.pkl': 'https://drive.google.com/file/d/1nTtpaiaY52wKZs8KWehUM70_O0rmPBmO/view?usp=drive_link',
|
14 |
+
'assets/nlp/advanced_recipe_embeddings_231630.npy': 'https://drive.google.com/file/d/1aCzBIdKcyB94qHjz14PnxmbxvD49DBSs/view?usp=drive_link',
|
15 |
+
'assets/nlp/advanced_filtered_recipes_231630.pkl': 'https://drive.google.com/file/d/1SwEmVjoVDrWD43CYynRT99EFRJepzapb/view?usp=drive_link',
|
16 |
+
'assets/nlp/pair_data.parquet': 'https://drive.google.com/file/d/1bs2s6xBFFQHFiVvZDGHfH91H5H-Np9Fa/view?usp=drive_link'
|
17 |
+
|
18 |
}
|
model/recipe_search.py
DELETED
@@ -1,139 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import csv
|
3 |
-
import ast
|
4 |
-
import pickle
|
5 |
-
import gdown
|
6 |
-
import torch
|
7 |
-
import torch.nn.functional as F
|
8 |
-
import streamlit as st
|
9 |
-
from transformers import BertTokenizer, BertModel
|
10 |
-
from config import GOOGLE_DRIVE_FILES
|
11 |
-
|
12 |
-
|
13 |
-
def download_file_from_drive(file_id: str, destination: str, file_name: str) -> bool:
|
14 |
-
try:
|
15 |
-
with st.spinner(f"Downloading {file_name}..."):
|
16 |
-
url = f"https://drive.google.com/uc?id={file_id}"
|
17 |
-
gdown.download(url, destination, quiet=False)
|
18 |
-
return True
|
19 |
-
except Exception as e:
|
20 |
-
st.error(f"Failed to download {file_name}: {e}")
|
21 |
-
return False
|
22 |
-
|
23 |
-
def ensure_files_downloaded():
|
24 |
-
for filename, file_id in GOOGLE_DRIVE_FILES.items():
|
25 |
-
if not os.path.exists(filename):
|
26 |
-
success = download_file_from_drive(file_id, filename, filename)
|
27 |
-
if not success:
|
28 |
-
return False
|
29 |
-
return True
|
30 |
-
|
31 |
-
class GoogleDriveRecipeSearch:
|
32 |
-
def __init__(self):
|
33 |
-
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
34 |
-
|
35 |
-
if not ensure_files_downloaded():
|
36 |
-
self.is_ready = False
|
37 |
-
return
|
38 |
-
|
39 |
-
self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
|
40 |
-
self.model = BertModel.from_pretrained("bert-base-uncased")
|
41 |
-
|
42 |
-
if os.path.exists("assets/nlp/tag_based_bert_model.pth"):
|
43 |
-
self.model.load_state_dict(
|
44 |
-
torch.load("assets/nlp/tag_based_bert_model.pth", map_location=self.device)
|
45 |
-
)
|
46 |
-
st.success("Trained model loaded successfully!")
|
47 |
-
else:
|
48 |
-
st.warning("Using untrained model")
|
49 |
-
|
50 |
-
self.model.to(self.device)
|
51 |
-
self.model.eval()
|
52 |
-
|
53 |
-
self.load_data()
|
54 |
-
self.is_ready = True
|
55 |
-
|
56 |
-
def load_data(self):
|
57 |
-
self.recipe_embeddings = torch.load("assets/nlp/torch_recipe_embeddings_231630.pt", map_location=self.device)
|
58 |
-
self.recipes = self._load_recipes("assets/nlp/RAW_recipes.csv")
|
59 |
-
self.recipe_stats = pickle.load(open("assets/nlp/recipe_statistics_231630.pkl", "rb"))
|
60 |
-
self.recipe_scores = pickle.load(open("assets/nlp/recipe_scores_231630.pkl", "rb"))
|
61 |
-
|
62 |
-
def _load_recipes(self, path):
|
63 |
-
recipes = []
|
64 |
-
with open(path, "r", encoding="utf-8") as file:
|
65 |
-
reader = csv.DictReader(file)
|
66 |
-
for idx, row in enumerate(reader):
|
67 |
-
name = row.get("name", "").strip()
|
68 |
-
if not name or name.lower() in ["nan", "unknown recipe"]:
|
69 |
-
continue
|
70 |
-
try:
|
71 |
-
recipe = {
|
72 |
-
"id": int(row.get("id", idx)),
|
73 |
-
"name": name,
|
74 |
-
"ingredients": ast.literal_eval(row.get("ingredients", "[]")),
|
75 |
-
"tags": ast.literal_eval(row.get("tags", "[]")),
|
76 |
-
"minutes": int(float(row.get("minutes", 0))),
|
77 |
-
"n_steps": int(float(row.get("n_steps", 0))),
|
78 |
-
"description": row.get("description", ""),
|
79 |
-
"steps": ast.literal_eval(row.get("steps", "[]"))
|
80 |
-
}
|
81 |
-
recipes.append(recipe)
|
82 |
-
except:
|
83 |
-
continue
|
84 |
-
return recipes
|
85 |
-
|
86 |
-
def search_recipes(self, query, num_results=5, min_rating=3.0):
|
87 |
-
if not query.strip():
|
88 |
-
return []
|
89 |
-
print('im here')
|
90 |
-
|
91 |
-
tokens = self.tokenizer(query, return_tensors="pt", truncation=True, padding=True)
|
92 |
-
tokens = {k: v.to(self.device) for k, v in tokens.items()}
|
93 |
-
|
94 |
-
with torch.no_grad():
|
95 |
-
outputs = self.model(**tokens)
|
96 |
-
query_embedding = outputs.last_hidden_state[:, 0, :]
|
97 |
-
|
98 |
-
query_embedding = F.normalize(query_embedding, dim=1)
|
99 |
-
recipe_embeddings = F.normalize(self.recipe_embeddings, dim=1)
|
100 |
-
|
101 |
-
similarity_scores = torch.matmul(recipe_embeddings, query_embedding.T).squeeze()
|
102 |
-
|
103 |
-
final_scores = []
|
104 |
-
for i in range(len(self.recipe_embeddings)):
|
105 |
-
recipe = self.recipes[i]
|
106 |
-
avg_rating, num_ratings, *_ = self.recipe_stats.get(recipe["id"], (0.0, 0, 0))
|
107 |
-
if avg_rating < min_rating or num_ratings < 2:
|
108 |
-
continue
|
109 |
-
combined_score = (
|
110 |
-
0.6 * similarity_scores[i].item() +
|
111 |
-
0.4 * self.recipe_scores.get(recipe["id"], 0)
|
112 |
-
)
|
113 |
-
final_scores.append((combined_score, i))
|
114 |
-
|
115 |
-
top_matches = sorted(final_scores, key=lambda x: x[0], reverse=True)[:num_results]
|
116 |
-
|
117 |
-
results = []
|
118 |
-
for score, idx in top_matches:
|
119 |
-
recipe = self.recipes[idx]
|
120 |
-
avg_rating, num_ratings, *_ = self.recipe_stats.get(recipe["id"], (0.0, 0, 0))
|
121 |
-
results.append({
|
122 |
-
"name": recipe["name"],
|
123 |
-
"tags": recipe.get("tags", []),
|
124 |
-
"ingredients": recipe.get("ingredients", []),
|
125 |
-
"minutes": recipe.get("minutes", 0),
|
126 |
-
"n_steps": recipe.get("n_steps", 0),
|
127 |
-
"avg_rating": avg_rating,
|
128 |
-
"num_ratings": num_ratings,
|
129 |
-
"similarity_score": similarity_scores[idx].item(),
|
130 |
-
"combined_score": score,
|
131 |
-
"steps": recipe.get("steps", []),
|
132 |
-
"description": recipe.get("description", "")
|
133 |
-
})
|
134 |
-
|
135 |
-
return results
|
136 |
-
|
137 |
-
@st.cache_resource
|
138 |
-
def load_search_system():
|
139 |
-
return GoogleDriveRecipeSearch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
{scripts/NLP β model}/search_script.py
RENAMED
@@ -3,33 +3,73 @@ import numpy as np
|
|
3 |
from transformers import BertTokenizer, BertModel
|
4 |
import pickle
|
5 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
class RecipeSearchSystem:
|
7 |
|
8 |
-
def __init__(self,
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
self.
|
29 |
-
|
30 |
-
|
31 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
def create_query_embedding(self, user_query):
|
35 |
|
@@ -145,7 +185,6 @@ class RecipeSearchSystem:
|
|
145 |
'unique_users': int(unique_users)
|
146 |
}
|
147 |
|
148 |
-
result = json.dumps(result)
|
149 |
return result
|
150 |
|
151 |
def search_recipes(self, user_query, top_k=5, min_rating=3.0, min_num_ratings=5):
|
@@ -177,16 +216,8 @@ class RecipeSearchSystem:
|
|
177 |
return final_results
|
178 |
|
179 |
|
180 |
-
def search_for_recipes(
|
181 |
-
|
182 |
-
results = search_system.search_recipes(
|
183 |
-
user_query=user_query,
|
184 |
-
top_k=top_k,
|
185 |
-
min_rating=min_rating,
|
186 |
-
min_num_ratings=min_num_ratings
|
187 |
-
)
|
188 |
-
|
189 |
-
return results
|
190 |
|
191 |
|
192 |
if __name__ == "__main__":
|
|
|
3 |
from transformers import BertTokenizer, BertModel
|
4 |
import pickle
|
5 |
import json
|
6 |
+
import gdown
|
7 |
+
import os
|
8 |
+
import sys
|
9 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
10 |
+
from config import GOOGLE_DRIVE_FILES
|
11 |
+
|
12 |
+
|
13 |
class RecipeSearchSystem:
|
14 |
|
15 |
+
def __init__(self, max_recipes=231630):
|
16 |
+
try:
|
17 |
+
# Load all the preprocessed files
|
18 |
+
self.max_recipes = max_recipes
|
19 |
+
file_paths = {
|
20 |
+
'recipe_embeddings': GOOGLE_DRIVE_FILES['assets/nlp/advanced_recipe_embeddings_231630.npy'],
|
21 |
+
'recipes_df': GOOGLE_DRIVE_FILES['assets/nlp/advanced_filtered_recipes_231630.pkl'],
|
22 |
+
'recipe_stats': GOOGLE_DRIVE_FILES['assets/nlp/recipe_statistics_231630.pkl'],
|
23 |
+
'model': GOOGLE_DRIVE_FILES['assets/nlp/tag_based_bert_model.pth']
|
24 |
+
}
|
25 |
+
output_path = "assets/nlp/"
|
26 |
+
#download files from google drive
|
27 |
+
self.ensure_files_exist(file_paths, output_path)
|
28 |
+
# Set up device
|
29 |
+
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
30 |
+
|
31 |
+
# Load tokenizer
|
32 |
+
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
33 |
+
|
34 |
+
# Load the trained model
|
35 |
+
self.model = BertModel.from_pretrained('bert-base-uncased')
|
36 |
+
self.model.load_state_dict(torch.load(f'{output_path}tag_based_bert_model.pth', map_location=self.device))
|
37 |
+
|
38 |
+
self.model.to(self.device)
|
39 |
+
self.model.eval()
|
40 |
+
|
41 |
+
#load recipe embeddings
|
42 |
+
self.recipe_embeddings = np.load(f'{output_path}advanced_recipe_embeddings_{self.max_recipes}.npy')
|
43 |
+
#load recipes dataframe
|
44 |
+
with open(f'{output_path}advanced_filtered_recipes_{self.max_recipes}.pkl', 'rb') as f:
|
45 |
+
self.recipes_df = pickle.load(f)
|
46 |
+
#load recipe statistics
|
47 |
+
with open(f'{output_path}recipe_statistics_{self.max_recipes}.pkl', 'rb') as f:
|
48 |
+
self.recipe_stats = pickle.load(f)
|
49 |
+
self.is_ready = True
|
50 |
+
except Exception as e:
|
51 |
+
print(f"Error initializing search system: {e}")
|
52 |
+
self.is_ready = False
|
53 |
+
|
54 |
+
def ensure_files_exist(self, file_paths, output_path):
|
55 |
+
# Create output directory if it doesn't exist
|
56 |
+
os.makedirs(output_path, exist_ok=True)
|
57 |
+
|
58 |
+
file_mapping = {
|
59 |
+
'recipe_embeddings': f'advanced_recipe_embeddings_{self.max_recipes}.npy',
|
60 |
+
'recipes_df': f'advanced_filtered_recipes_{self.max_recipes}.pkl',
|
61 |
+
'recipe_stats': f'recipe_statistics_{self.max_recipes}.pkl',
|
62 |
+
'model': f'tag_based_bert_model.pth'
|
63 |
+
}
|
64 |
|
65 |
+
for key, local_filename in file_mapping.items():
|
66 |
+
local_path = os.path.join(output_path, local_filename)
|
67 |
+
if not os.path.exists(local_path):
|
68 |
+
print(f"Downloading {local_filename}...")
|
69 |
+
gdown.download(file_paths[key], local_path, quiet=False, fuzzy=True)
|
70 |
+
print(f"Downloaded {local_filename}")
|
71 |
+
else:
|
72 |
+
print(f"{local_filename} already exists, skipping download")
|
73 |
|
74 |
def create_query_embedding(self, user_query):
|
75 |
|
|
|
185 |
'unique_users': int(unique_users)
|
186 |
}
|
187 |
|
|
|
188 |
return result
|
189 |
|
190 |
def search_recipes(self, user_query, top_k=5, min_rating=3.0, min_num_ratings=5):
|
|
|
216 |
return final_results
|
217 |
|
218 |
|
219 |
+
def search_for_recipes():
|
220 |
+
return RecipeSearchSystem()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
|
222 |
|
223 |
if __name__ == "__main__":
|
pages/3_Recipe_Recommendation.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from utils.layout import render_layout
|
2 |
import streamlit as st
|
3 |
import time
|
4 |
-
from model.
|
5 |
import streamlit.components.v1 as components
|
6 |
|
7 |
def recipe_search_page():
|
@@ -15,7 +15,7 @@ def recipe_search_page():
|
|
15 |
|
16 |
if 'search_system' not in st.session_state:
|
17 |
with st.spinner("π Initializing recipe search system..."):
|
18 |
-
st.session_state.search_system =
|
19 |
|
20 |
search_system = st.session_state.search_system
|
21 |
|
|
|
1 |
from utils.layout import render_layout
|
2 |
import streamlit as st
|
3 |
import time
|
4 |
+
from model.search_script import search_for_recipes # assumed you modularized this logic
|
5 |
import streamlit.components.v1 as components
|
6 |
|
7 |
def recipe_search_page():
|
|
|
15 |
|
16 |
if 'search_system' not in st.session_state:
|
17 |
with st.spinner("π Initializing recipe search system..."):
|
18 |
+
st.session_state.search_system = search_for_recipes()
|
19 |
|
20 |
search_system = st.session_state.search_system
|
21 |
|
scripts/.DS_Store
CHANGED
Binary files a/scripts/.DS_Store and b/scripts/.DS_Store differ
|
|
scripts/NLP/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|