azaher1215 commited on
Commit
05b9293
Β·
1 Parent(s): a306fec

final report additions

Browse files
Home.py CHANGED
@@ -9,16 +9,14 @@ render_header()
9
 
10
  st.markdown("""
11
  <div class="about-box">
12
- Welcome to our Smart Kitchen Assistant β€” a CSE555 Final Project developed by Group 5 (Saksham & Ahmed).
13
- <br><br>
14
- πŸ” This tool leverages AI to assist in:
15
- - Classifying images of vegetables and fruits.
16
- - Detecting their variations (cut, whole, sliced).
17
- - Recommending recipes based on natural language input.
18
  </div>
19
 
20
- ### πŸ”— Use the left sidebar to navigate between:
21
- - πŸ₯¦ Task A: Classification
22
- - 🧊 Task B: Variation Detection
23
- - 🧠 NLP Recipe Recommendation
24
  """, unsafe_allow_html=True)
 
9
 
10
  st.markdown("""
11
  <div class="about-box">
12
+ This tool leverages AI to assist in:<br>
13
+ - Classifying images of vegetables and fruits.<br>
14
+ - Detecting their variations (cut, whole, sliced).<br>
15
+ - Recommending recipes based on natural language input.<br>
 
 
16
  </div>
17
 
18
+ ### Use the left sidebar to navigate between:
19
+ - Task A: Classification - upload an image of a vegetable or fruit to classify it.
20
+ - Task B: Variation Detection - upload an image of a vegetable or fruit to detect its variation.
21
+ - NLP Recipe Recommendation - enter a search query to recommend a recipe.
22
  """, unsafe_allow_html=True)
model/search_script.py CHANGED
@@ -228,7 +228,7 @@ if __name__ == "__main__":
228
  # "chocolate cake dessert brownie baked healthy",
229
  # "healthy vegetarian salad tomato basil",
230
  # "quick easy dinner",
231
- # "beef steak",
232
  "beef pasta",
233
  "beef"
234
  ]
 
228
  # "chocolate cake dessert brownie baked healthy",
229
  # "healthy vegetarian salad tomato basil",
230
  # "quick easy dinner",
231
+ "beef steak",
232
  "beef pasta",
233
  "beef"
234
  ]
pages/3_Recipe_Recommendation.py CHANGED
@@ -1,7 +1,7 @@
1
  from utils.layout import render_layout
2
  import streamlit as st
3
  import time
4
- from model.search_script import search_for_recipes # assumed you modularized this logic
5
  import streamlit.components.v1 as components
6
 
7
  def recipe_search_page():
@@ -9,7 +9,7 @@ def recipe_search_page():
9
  ## Advanced Recipe Recommendation
10
  <div class="about-box">
11
  This module uses a custom-trained BERT model to semantically search recipes
12
- based on your query, ingredients, and tags.
13
  </div>
14
  """, unsafe_allow_html=True)
15
 
@@ -25,7 +25,7 @@ def recipe_search_page():
25
 
26
  query = st.text_input(
27
  "Search for recipes:",
28
- placeholder="e.g., 'chicken pasta', 'vegetarian salad', 'chocolate dessert', 'quick easy "
29
  )
30
 
31
  col1, col2 = st.columns(2)
@@ -50,36 +50,38 @@ def recipe_search_page():
50
  description = recipe.get("description", "").strip().capitalize()
51
 
52
  html_code = f"""
53
- <div style="margin-bottom: 24px; padding: 16px; border-radius: 12px; background-color: #fdfdfd; box-shadow: 0 2px 8px rgba(0,0,0,0.06); font-family: Arial, sans-serif;">
54
- <div style="font-size: 18px; font-weight: bold; color: #333;"> {i}. {recipe['name']}</div>
55
 
56
- <div style="margin: 4px 0 8px 0; font-size: 14px; color: #555;">
57
  <b>{recipe['minutes']} min</b> &nbsp;&nbsp;|&nbsp;&nbsp; <b>{recipe['n_steps']} steps</b> &nbsp;&nbsp;|&nbsp;&nbsp; <b>{recipe['avg_rating']:.1f}/5.0</b>
58
- <span style="font-size: 12px; color: #999;">({recipe['num_ratings']} ratings)</span>
59
  </div>
60
 
61
- <div style="margin-bottom: 6px; font-size: 14px;">
62
- <b>Match Score:</b> <span style="color: #007acc; font-weight: bold;">{recipe['similarity_score']:.1%}</span>
63
- <span style="font-size: 12px; color: #888;">(query match)</span><br>
64
  </div>
65
 
66
- <div style="margin-bottom: 6px;">
67
  <b>Tags:</b><br>
68
- {" ".join([f"<span style='background:#eee;padding:4px 8px;border-radius:6px;margin:2px;display:inline-block;font-size:12px'>{tag}</span>" for tag in recipe['tags']])}
 
 
69
  </div>
70
 
71
- <div style="margin-bottom: 6px;">
72
  <b>Ingredients:</b><br>
73
- <span style="font-size: 13px; color: #444;">{', '.join(recipe['ingredients'][:8])}
74
  {'...' if len(recipe['ingredients']) > 8 else ''}</span>
75
  </div>
76
 
77
- {"<div style='margin-top: 10px; font-size: 13px; color: #333;'><b>Description:</b><br>" + description + "</div>" if description else ""}
78
 
79
  {"<div style='margin-top: 10px; font-size: 13px;'><b>Steps:</b><ol style='margin: 6px 0 0 18px; padding: 0;'>" + steps_html + "</ol></div>" if steps_html else ""}
80
  </div>
81
  """
82
- components.html(html_code, height=360 + len(recipe.get("steps", [])) * 20)
83
 
84
  else:
85
  st.warning(f"No recipes found for '{query}' with a minimum rating of {min_rating}/5.0.")
 
1
  from utils.layout import render_layout
2
  import streamlit as st
3
  import time
4
+ from model.search_script import search_for_recipes
5
  import streamlit.components.v1 as components
6
 
7
  def recipe_search_page():
 
9
  ## Advanced Recipe Recommendation
10
  <div class="about-box">
11
  This module uses a custom-trained BERT model to semantically search recipes
12
+ based on your query of ingredients and tags.
13
  </div>
14
  """, unsafe_allow_html=True)
15
 
 
25
 
26
  query = st.text_input(
27
  "Search for recipes:",
28
+ placeholder="e.g., 'chicken pasta italian', 'vegetarian salad', 'chocolate dessert', 'quick easy' "
29
  )
30
 
31
  col1, col2 = st.columns(2)
 
50
  description = recipe.get("description", "").strip().capitalize()
51
 
52
  html_code = f"""
53
+ <div style=\"margin: 8px 0 8px 0; padding: 8px; border-radius: 12px; background-color: #fdfdfd; box-shadow: 0 2px 8px rgba(0,0,0,0.06); font-family: Arial, sans-serif; border: 1px solid #e0e0e0;\">
54
+ <div style=\"font-size: 18px; font-weight: bold; color: #333; margin-bottom: 8px;\"> {i}. {recipe['name']}</div>
55
 
56
+ <div style=\"margin: 4px 0 12px 0; font-size: 14px; color: #555;\">
57
  <b>{recipe['minutes']} min</b> &nbsp;&nbsp;|&nbsp;&nbsp; <b>{recipe['n_steps']} steps</b> &nbsp;&nbsp;|&nbsp;&nbsp; <b>{recipe['avg_rating']:.1f}/5.0</b>
58
+ <span style=\"font-size: 12px; color: #999;\">({recipe['num_ratings']} ratings)</span>
59
  </div>
60
 
61
+ <div style=\"margin-bottom: 8px; font-size: 14px;\">
62
+ <b>Match Score:</b> <span style=\"color: #007acc; font-weight: bold;\">{recipe['similarity_score']:.1%}</span>
63
+ <span style=\"font-size: 12px; color: #888;\">(query match)</span>
64
  </div>
65
 
66
+ <div style=\"margin-bottom: 8px;\">
67
  <b>Tags:</b><br>
68
+ <div style=\"margin-top: 8px;\">
69
+ {" ".join([f"<span style='background:#eee;padding:4px 8px;border-radius:6px;margin:2px;display:inline-block;font-size:12px'>{tag}</span>" for tag in recipe['tags']])}
70
+ </div>
71
  </div>
72
 
73
+ <div style=\"margin-bottom: 8px;\">
74
  <b>Ingredients:</b><br>
75
+ <span style=\"font-size: 13px; color: #444; margin-top: 4px; display: block;\">{', '.join(recipe['ingredients'][:8])}
76
  {'...' if len(recipe['ingredients']) > 8 else ''}</span>
77
  </div>
78
 
79
+ {"<div style='margin-top: 10px; font-size: 13px; color: #333;'><b>Description:</b><br><span style='margin-top: 4px; display: block;'>" + description + "</span></div>" if description else ""}
80
 
81
  {"<div style='margin-top: 10px; font-size: 13px;'><b>Steps:</b><ol style='margin: 6px 0 0 18px; padding: 0;'>" + steps_html + "</ol></div>" if steps_html else ""}
82
  </div>
83
  """
84
+ components.html(html_code, height=340, scrolling=True)
85
 
86
  else:
87
  st.warning(f"No recipes found for '{query}' with a minimum rating of {min_rating}/5.0.")
pages/4_Report.py CHANGED
@@ -1,90 +1,124 @@
1
  import streamlit as st
 
2
 
3
  def render_report():
4
- st.title("Group 5: Term Project Report")
5
 
6
  # Title Page Information
7
  st.markdown("""
8
- **Course:** CSE 555 β€” Introduction to Pattern Recognition
9
  **Authors:** Saksham Lakhera and Ahmed Zaher
10
  **Date:** July 2025
11
  """)
12
 
13
  # Abstract
14
- st.header("Abstract")
15
 
16
- st.subheader("NLP Engineering Perspective")
17
  st.markdown("""
18
- This project addresses the challenge of improving recipe recommendation systems through
19
- advanced semantic search capabilities using transformer-based language models. Traditional
20
- keyword-based search methods often fail to capture the nuanced relationships between
21
- ingredients, cooking techniques, and user preferences in culinary contexts.
22
 
 
 
 
 
23
  Our approach leverages BERT (Bidirectional Encoder Representations from Transformers)
24
  fine-tuning on a custom recipe dataset to develop a semantic understanding of culinary content.
25
  We preprocessed and structured a subset of 15,000 recipes into standardized sequences organized
26
  by food categories (proteins, vegetables, legumes, etc.) to create training data optimized for
27
  the BERT architecture.
28
-
29
  The model was fine-tuned to learn contextual embeddings that capture semantic relationships
30
- between ingredients and tags. At inference time we generate embeddings for all recipes in our
31
  dataset and perform cosine-similarity retrieval to produce the top-K most relevant recipes
32
  for a user query.
33
  """)
34
 
35
  # Introduction
36
- st.header("Introduction")
37
  st.markdown("""
38
- This term project serves primarily as an educational exercise aimed at giving students
39
  end-to-end exposure to building a modern NLP system. Our goal is to construct a semantic
40
  recipe-search engine that demonstrates how domain-specific fine-tuning of BERT can
41
  substantially improve retrieval quality over simple keyword matching.
42
 
43
  **Key Contributions:**
44
  - A cleaned, category-labelled recipe subset of 15,000 recipes
45
- - Training scripts that yield domain-adapted contextual embeddings
46
  - A production-ready retrieval service that returns top-K most relevant recipes
47
  - Comparative evaluation against classical baselines
48
  """)
49
 
50
  # Dataset and Preprocessing
51
- st.header("Dataset and Pre-processing")
52
 
53
- st.subheader("Data Sources")
54
  st.markdown("""
 
 
55
  The project draws from two CSV files:
56
- - **Raw_recipes.csv** – 231,637 rows, one per recipe with columns: *id, name, ingredients, tags, minutes, steps, description, n_steps, n_ingredients*
57
- - **Raw_interactions.csv** – user feedback containing *recipe_id, user_id, rating (1-5), review text*
58
  """)
59
 
60
- st.subheader("Corpus Filtering and Subset Selection")
61
  st.markdown("""
62
- 1. **Invalid rows removed** – recipes with empty ingredient lists, missing tags, or fewer than three total tags
63
- 2. **Random sampling** – 15,000 recipes selected for NLP fine-tuning
64
- 3. **Positive/negative pairs** – generated for contrastive learning using ratings and tag similarity
65
- 4. **Train/test split** – 80/20 stratified split (12,000/3,000 pairs)
 
 
66
  """)
67
 
68
- st.subheader("Text Pre-processing Pipeline")
69
  st.markdown("""
70
- - **Lower-casing & punctuation removal** – normalized to lowercase, special characters stripped
71
- - **Stop-descriptor removal** – culinary modifiers (*fresh, chopped, minced*) and measurements removed
72
- - **Ingredient ordering** – re-ordered into sequence: **protein β†’ vegetables β†’ grains β†’ dairy β†’ other**
73
- - **Tag normalization** – mapped to six canonical slots: *cuisine, course, main-ingredient, dietary, difficulty, occasion*
74
- - **Tokenization** – standard *bert-base-uncased* WordPiece tokenizer, sequences truncated/padded to 128 tokens
 
 
75
  """)
76
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  # Methodology
78
- st.header("Methodology")
79
 
80
- st.subheader("Model Architecture")
81
  st.markdown("""
82
- - **Base Model:** `bert-base-uncased` checkpoint
83
- - **Additional Layers:** Single linear classification layer (768 β†’ 1) with dropout (p = 0.1)
84
- - **Training Objective:** Triplet-margin loss with margin of 1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  """)
86
 
87
- st.subheader("Hyperparameters")
88
  col1, col2 = st.columns(2)
89
  with col1:
90
  st.markdown("""
@@ -98,108 +132,94 @@ def render_report():
98
  - **Optimizer:** AdamW
99
  - **Epochs:** 3
100
  - **Hardware:** Google Colab A100 GPU (40 GB VRAM)
101
- - **Training time:** ~75 minutes per run
102
  """)
103
 
104
  # Mathematical Formulations
105
- st.header("Mathematical Formulations")
106
 
107
- st.subheader("Query Embedding and Similarity Calculation")
 
 
 
108
  st.latex(r"""
109
  \text{Similarity}(q, r_i) = \cos(\hat{q}, \hat{r}_i) = \frac{\hat{q} \cdot \hat{r}_i}{\|\hat{q}\|\|\hat{r}_i\|}
110
  """)
111
  st.markdown("Where $\\hat{q}$ is the BERT embedding of the query, and $\\hat{r}_i$ is the embedding of the i-th recipe.")
112
 
113
- st.subheader("Final Score Calculation")
114
- st.latex(r"""
115
- \text{Score}_i = 0.6 \times \text{Similarity}_i + 0.4 \times \text{Popularity}_i
116
- """)
117
 
118
  # Results
119
- st.header("Results")
120
 
121
- st.subheader("Training and Validation Loss")
122
  results_data = {
123
  "Run": [1, 2, 3, 4],
124
  "Configuration": [
125
  "Raw, no cleaning/ordering",
126
  "Cleaned text, unordered",
127
- "Cleaned text + dropout",
128
- "Cleaned text + dropout + ordering"
129
  ],
130
  "Epoch-3 Train Loss": [0.0065, 0.0023, 0.0061, 0.0119],
131
  "Validation Loss": [0.1100, 0.0000, 0.0118, 0.0067]
132
  }
133
  st.table(results_data)
134
-
135
  st.markdown("""
136
- **Key Finding:** Run 4 (cleaned text + dropout + ordering) achieved the best balance
137
  between low validation loss and meaningful retrieval quality.
138
  """)
139
 
140
- st.subheader("Qualitative Retrieval Examples")
141
  st.markdown("""
 
142
  **Query: "beef steak dinner"**
143
  - Run 1 (Raw): *to die for crock pot roast*, *crock pot chicken with black beans*
 
 
144
  - Run 4 (Final): *grilled garlic steak dinner*, *classic beef steak au poivre*
145
 
146
  **Query: "chicken italian pasta"**
147
  - Run 1 (Raw): *to die for crock pot roast*, *crock pot chicken with black beans*
 
 
148
  - Run 4 (Final): *creamy tuscan chicken pasta*, *italian chicken penne bake*
149
 
150
  **Query: "vegetarian salad healthy"**
151
- - Run 1 (Raw): (irrelevant hits)
 
 
152
  - Run 4 (Final): *kale quinoa power salad*, *superfood spinach & berry salad*
153
  """)
154
 
155
  # Discussion and Conclusion
156
- st.header("Discussion and Conclusion")
157
  st.markdown("""
158
  The experimental evidence underscores the importance of disciplined pre-processing when
159
- adapting large language models to niche domains. The breakthrough came with **ingredient-ordering**
160
- (protein β†’ vegetables β†’ grains β†’ dairy β†’ other) which supplied consistent positional signals.
 
 
161
 
162
  **Key Achievements:**
163
  - End-to-end recipe recommendation system with semantic search
164
- - Sub-second latency across 231k recipes
165
  - Meaningful semantic understanding of culinary content
166
  - Reproducible blueprint for domain-specific NLP applications
167
 
168
  **Limitations:**
169
- - Private dataset relatively small (15k samples) compared to public corpora
 
170
  - Minimal hyperparameter search conducted
171
  - Single-machine deployment tested
 
172
  """)
173
 
174
- # Technical Specifications
175
- st.header("Technical Specifications")
176
- col1, col2 = st.columns(2)
177
- with col1:
178
- st.markdown("""
179
- **Dataset:**
180
- - Total Recipes: 231,630
181
- - Training Set: 15,000 recipes
182
- - Average Tags per Recipe: ~6
183
- - Ingredients per Recipe: 3-20
184
- """)
185
- with col2:
186
- st.markdown("""
187
- **Infrastructure:**
188
- - Python 3.10
189
- - PyTorch 2.1 (CUDA 11.8)
190
- - Transformers 4.38
191
- - Google Colab A100 GPU
192
- """)
193
-
194
  # References
195
- st.header("References")
196
  st.markdown("""
197
- [1] Vaswani et al., "Attention Is All You Need," NeurIPS, 2017.
198
-
199
- [2] Devlin et al., "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding," NAACL-HLT, 2019.
200
-
201
- [3] Reimers and Gurevych, "Sentence-BERT: Sentence Embeddings Using Siamese BERT-Networks," EMNLP-IJCNLP, 2019.
202
-
203
  [4] Hugging Face, "BERT Model Documentation," 2024.
204
  """)
205
 
@@ -207,5 +227,5 @@ def render_report():
207
  st.markdown("Β© 2025 CSE 555 Term Project. All rights reserved.")
208
 
209
  # Render the report
210
- render_report()
211
 
 
1
  import streamlit as st
2
+ from utils.layout import render_layout
3
 
4
  def render_report():
5
+ st.title("Image Classification CV and Fine-Tuned NLP Recipe Recommendation")
6
 
7
  # Title Page Information
8
  st.markdown("""
 
9
  **Authors:** Saksham Lakhera and Ahmed Zaher
10
  **Date:** July 2025
11
  """)
12
 
13
  # Abstract
14
+ st.subheader("Abstract")
15
 
 
16
  st.markdown("""
17
+ **NLP Engineering Perspective:**
 
 
 
18
 
19
+ This project addresses the challenge of improving recipe recommendation systems through
20
+ advanced semantic search capabilities using transformer-based language models. This will explain how to fine-tune a model
21
+ to learn domain-specific context to capture the nuanced relationships between
22
+ ingredients and cooking techniques in culinary contexts.
23
  Our approach leverages BERT (Bidirectional Encoder Representations from Transformers)
24
  fine-tuning on a custom recipe dataset to develop a semantic understanding of culinary content.
25
  We preprocessed and structured a subset of 15,000 recipes into standardized sequences organized
26
  by food categories (proteins, vegetables, legumes, etc.) to create training data optimized for
27
  the BERT architecture.
 
28
  The model was fine-tuned to learn contextual embeddings that capture semantic relationships
29
+ between ingredients and tags. At the end, we generate embeddings for all recipes in our
30
  dataset and perform cosine-similarity retrieval to produce the top-K most relevant recipes
31
  for a user query.
32
  """)
33
 
34
  # Introduction
35
+ st.subheader("Introduction")
36
  st.markdown("""
37
+ This term project serves primarily as an educational exercise aimed at giving
38
  end-to-end exposure to building a modern NLP system. Our goal is to construct a semantic
39
  recipe-search engine that demonstrates how domain-specific fine-tuning of BERT can
40
  substantially improve retrieval quality over simple keyword matching.
41
 
42
  **Key Contributions:**
43
  - A cleaned, category-labelled recipe subset of 15,000 recipes
44
+ - Training scripts that yield adapted contextual embeddings
45
  - A production-ready retrieval service that returns top-K most relevant recipes
46
  - Comparative evaluation against classical baselines
47
  """)
48
 
49
  # Dataset and Preprocessing
50
+ st.subheader("Dataset and Pre-processing")
51
 
 
52
  st.markdown("""
53
+ **Data Sources:**
54
+
55
  The project draws from two CSV files:
56
+ - **Raw_recipes.csv:** 231,637 rows, one per recipe with columns: *id, name, ingredients, tags, minutes, steps, description, n_steps, n_ingredients*
57
+ - **Raw_interactions.csv:** user feedback containing *recipe_id, user_id, rating, review text*
58
  """)
59
 
 
60
  st.markdown("""
61
+ **Corpus Filtering and Subset Selection**
62
+
63
+ - **Invalid rows removed:** recipes with empty ingredient lists, missing tags, or fewer than three total tags
64
+ - **Random sampling:** 15,000 recipes selected for NLP fine-tuning
65
+ - **Positive/negative pairs:** generated for contrastive learning using ratings and tag similarity
66
+ - **Train/test split:** 80/20 stratified split (12,000/3,000 pairs)
67
  """)
68
 
 
69
  st.markdown("""
70
+ **Text Pre-processing Pipeline**
71
+
72
+ - **Lower-casing & punctuation removal:** normalized to lowercase, special characters stripped
73
+ - **Stop-descriptor removal:** culinary modifiers (*fresh, chopped, minced*) and measurements (tablespoons, teaspoons, cups, etc.) removed
74
+ - **Ingredient ordering:** re-ordered into sequence: protein β†’ vegetables/grains/ dairy β†’ other
75
+ - **Tag normalization:** mapped to 7 main categories: *cuisine, course, main-ingredient, dietary, difficulty, occasion, cooking_method*
76
+ - **Tokenization:** standard *bert-base-uncased* WordPiece tokenizer, sequences truncated/padded to 128 tokens
77
  """)
78
+ # Technical Specifications
79
+ st.subheader("Technical Specifications")
80
+ col1, col2 = st.columns(2)
81
+ with col1:
82
+ st.markdown("""
83
+ **Dataset:**
84
+ - Total Recipes: 231,630
85
+ - Training Set: 12,000 recipes
86
+ - Average Tags per Recipe: ~6
87
+ - Ingredients per Recipe: 3-20
88
+ """)
89
+ with col2:
90
+ st.markdown("""
91
+ **Infrastructure:**
92
+ - Python 3.10
93
+ - PyTorch 2.1 (CUDA 11.8)
94
+ - Transformers 4.38
95
+ - Google Colab A100 GPU
96
+ """)
97
  # Methodology
98
+ st.subheader("Methodology")
99
 
 
100
  st.markdown("""
101
+ **Model Architecture**
102
+
103
+ - **Base Model:** bert-base-uncased
104
+ - **Additional Layers:** In some runs, we added a single linear classification layer with dropout (p = 0.1)
105
+ - **Training Objective:** Triplet-margin loss with margin of 1.0
106
+
107
+ We trained the model directly on the raw data to see if we will get any good results. As seen in table 1, this run resulted in a very low training error
108
+ but when ran on the validation set, the training error was higher. We then used cleaned up the data by removing any empty space, standardized to lower text, removed
109
+ all punctuation and retrained the model. This resulted in a highly overfitted model as seen in table 1 and the results section below. Next, we added a single linear layer on top of
110
+ the BERT's current architecture and added a dropout to get rid of overfitting. The results as shown in table 1 were better. Although the semantic
111
+ results were better than before, it still was not good in indentifying the relashionships between ingredients and the different tags. We then further
112
+ structured the data by ordering the tags and ingredients in a strcutured manner across the dataset and retrained the model. This resulted in a better
113
+ training and validation loss. This is also evident in the semantic retrieval results below.
114
+
115
+ **Website Development:**
116
+ - We used streamlit to develop the websit. However, we faced few issues with the size of the trained model and we switched hosting to Hugging Face.
117
+ - The website loades the pre-trained model along with recipes embeddings and top-k retrieval function and waits for the user to enter a query.
118
+ - The query is then processed b the model and top-k recipes are returned.
119
  """)
120
 
121
+ st.markdown("**Hyperparameters and Training**")
122
  col1, col2 = st.columns(2)
123
  with col1:
124
  st.markdown("""
 
132
  - **Optimizer:** AdamW
133
  - **Epochs:** 3
134
  - **Hardware:** Google Colab A100 GPU (40 GB VRAM)
135
+ - **Training time:** ~30 minutes per run
136
  """)
137
 
138
  # Mathematical Formulations
139
+ st.subheader("Mathematical Formulations and Top-K Retrieval")
140
 
141
+ st.markdown("""**Query Embedding and Similarity Calculation**: we used the trained model weights to generate embeddings for the entire recipe corpus. We then used cosine similarity to calculate the similarity between the query and the recipe corpus.
142
+ and once the user query is passed, we embedded the querry using the trained model and used the cosine similarity formula below to retrieve the top-K
143
+ recipes. We then filtered the only ones that have an average rating >= 3.0 and at least 5 ratings. We then sorted the recipes by similarity and then by average rating.
144
+ """)
145
  st.latex(r"""
146
  \text{Similarity}(q, r_i) = \cos(\hat{q}, \hat{r}_i) = \frac{\hat{q} \cdot \hat{r}_i}{\|\hat{q}\|\|\hat{r}_i\|}
147
  """)
148
  st.markdown("Where $\\hat{q}$ is the BERT embedding of the query, and $\\hat{r}_i$ is the embedding of the i-th recipe.")
149
 
 
 
 
 
150
 
151
  # Results
152
+ st.subheader("Results")
153
 
154
+ st.markdown("**Training and Validation Loss**")
155
  results_data = {
156
  "Run": [1, 2, 3, 4],
157
  "Configuration": [
158
  "Raw, no cleaning/ordering",
159
  "Cleaned text, unordered",
160
+ "Cleaned text + single layer + dropout",
161
+ "Cleaned text + ordering"
162
  ],
163
  "Epoch-3 Train Loss": [0.0065, 0.0023, 0.0061, 0.0119],
164
  "Validation Loss": [0.1100, 0.0000, 0.0118, 0.0067]
165
  }
166
  st.table(results_data)
167
+ st.markdown("""Table 1: Training and Validation Loss for each run""")
168
  st.markdown("""
169
+ **Key Finding:** Run 4 (cleaned text + ordering) achieved the best balance
170
  between low validation loss and meaningful retrieval quality.
171
  """)
172
 
173
+ st.markdown("**Qualitative Retrieval Examples**")
174
  st.markdown("""
175
+ In this section, we will show how the results of the model differ between runs and how the model performs on different queries.
176
  **Query: "beef steak dinner"**
177
  - Run 1 (Raw): *to die for crock pot roast*, *crock pot chicken with black beans*
178
+ - Run 2 (Cleaned text, unordered): *aussie pepper steak steak with creamy pepper sauce*
179
+ - Run 3 (Cleaned text + single layer + dropout): *balsamic rib eye steak with bleu cheese sauce*
180
  - Run 4 (Final): *grilled garlic steak dinner*, *classic beef steak au poivre*
181
 
182
  **Query: "chicken italian pasta"**
183
  - Run 1 (Raw): *to die for crock pot roast*, *crock pot chicken with black beans*
184
+ - Run 2 (Cleaned text, unordered): *baked chicken soup*
185
+ - Run 3 (Cleaned text + single layer + dropout): *absolute best ever lasagna*
186
  - Run 4 (Final): *creamy tuscan chicken pasta*, *italian chicken penne bake*
187
 
188
  **Query: "vegetarian salad healthy"**
189
+ - Run 1 (Raw): *to die for crock pot roast*
190
+ - Run 2 (Cleaned text, unordered): *avocado mandarin salad*
191
+ - Run 3 (Cleaned text + single layer + dropout): *black bean and sweet potato salad*
192
  - Run 4 (Final): *kale quinoa power salad*, *superfood spinach & berry salad*
193
  """)
194
 
195
  # Discussion and Conclusion
196
+ st.subheader("Discussion and Conclusion")
197
  st.markdown("""
198
  The experimental evidence underscores the importance of disciplined pre-processing when
199
+ adapting large language models to niche domains. The breakthrough came with ingredient-ordering
200
+ (protein β†’ vegetables β†’ grains β†’ dairy β†’ other) which supplied consistent positional signals. As we can see in the results,
201
+ the performance of the model improves with the addition of the single layer and dropout but the results are still not as good as the final run where
202
+ we added the ordering of the ingredients.
203
 
204
  **Key Achievements:**
205
  - End-to-end recipe recommendation system with semantic search
 
206
  - Meaningful semantic understanding of culinary content
207
  - Reproducible blueprint for domain-specific NLP applications
208
 
209
  **Limitations:**
210
+ - Private dataset relatively small training set (12k samples) compared to public corpora
211
+ - Further pre-processing could be done to improve the results
212
  - Minimal hyperparameter search conducted
213
  - Single-machine deployment tested
214
+ - The model is not able to handle complex queries and it is not able to handle synonyms and antonyms.
215
  """)
216
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  # References
218
+ st.subheader("References")
219
  st.markdown("""
220
+ [1] Vaswani et al., "Attention Is All You Need," NeurIPS, 2017.
221
+ [2] Devlin et al., "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding," NAACL-HLT, 2019.
222
+ [3] Reimers and Gurevych, "Sentence-BERT: Sentence Embeddings Using Siamese BERT-Networks," EMNLP-IJCNLP, 2019.
 
 
 
223
  [4] Hugging Face, "BERT Model Documentation," 2024.
224
  """)
225
 
 
227
  st.markdown("Β© 2025 CSE 555 Term Project. All rights reserved.")
228
 
229
  # Render the report
230
+ render_layout(render_report)
231
 
utils/layout.py CHANGED
@@ -3,7 +3,7 @@ import streamlit as st
3
 
4
  def set_custom_page_config():
5
  st.set_page_config(
6
- page_title="Smart Kitchen Assistant",
7
  layout="wide",
8
  initial_sidebar_state="expanded"
9
  )
@@ -11,15 +11,15 @@ def set_custom_page_config():
11
  def render_header():
12
  st.markdown("""
13
  <div class="project-header">
14
- <h1>Smart Kitchen Assistant</h1>
15
- <p>CSE555 Final Project β€” Group 5: Saksham & Ahmed</p>
16
  </div>
17
  """, unsafe_allow_html=True)
18
 
19
  def render_footer():
20
  st.markdown("""
21
  <div class="footer">
22
- <p>Made with ❀️ by Saksham & Ahmed | CSE555 @ UB</p>
23
  </div>
24
  """, unsafe_allow_html=True)
25
 
 
3
 
4
  def set_custom_page_config():
5
  st.set_page_config(
6
+ page_title="Computer Vision and Natural Language Processing Project",
7
  layout="wide",
8
  initial_sidebar_state="expanded"
9
  )
 
11
  def render_header():
12
  st.markdown("""
13
  <div class="project-header">
14
+ <h1>Computer Vision and Natural Language Processing Project</h1>
15
+ <p>CSE555 Final Project β€” Group 5: Saksham Lakhera & Ahmed Zaher</p>
16
  </div>
17
  """, unsafe_allow_html=True)
18
 
19
  def render_footer():
20
  st.markdown("""
21
  <div class="footer">
22
+ <p>Made with ❀️ by Saksham & Zaher | CSE555 @ UB</p>
23
  </div>
24
  """, unsafe_allow_html=True)
25