Spaces:

PatternGroup5
/

pattern

Sleeping

App Files Files Community

azaher1215 commited on Jul 24

Commit

05b9293

1 Parent(s): a306fec

final report additions

Browse files

Files changed (5) hide show

Home.py +8 -10
model/search_script.py +1 -1
pages/3_Recipe_Recommendation.py +18 -16
pages/4_Report.py +103 -83
utils/layout.py +4 -4

Home.py CHANGED Viewed

@@ -9,16 +9,14 @@ render_header()
 st.markdown("""
 <div class="about-box">
-    Welcome to our Smart Kitchen Assistant — a CSE555 Final Project developed by Group 5 (Saksham & Ahmed).
-    <br><br>
-    🔍 This tool leverages AI to assist in:
-    - Classifying images of vegetables and fruits.
-    - Detecting their variations (cut, whole, sliced).
-    - Recommending recipes based on natural language input.
 </div>
-### 🔗 Use the left sidebar to navigate between:
-- 🥦 Task A: Classification
-- 🧊 Task B: Variation Detection
-- 🧠 NLP Recipe Recommendation
 """, unsafe_allow_html=True)

 st.markdown("""
 <div class="about-box">
+     This tool leverages AI to assist in:<br>
+        - Classifying images of vegetables and fruits.<br>
+        - Detecting their variations (cut, whole, sliced).<br>
+        - Recommending recipes based on natural language input.<br>
 </div>
+### Use the left sidebar to navigate between:
+-  Task A: Classification - upload an image of a vegetable or fruit to classify it.
+-  Task B: Variation Detection - upload an image of a vegetable or fruit to detect its variation.
+-  NLP Recipe Recommendation - enter a search query to recommend a recipe.
 """, unsafe_allow_html=True)

model/search_script.py CHANGED Viewed

@@ -228,7 +228,7 @@ if __name__ == "__main__":
         # "chocolate cake dessert brownie baked healthy",
         # "healthy vegetarian salad tomato basil",
         # "quick easy dinner",
-        # "beef steak",
         "beef pasta",
         "beef"
     ]

         # "chocolate cake dessert brownie baked healthy",
         # "healthy vegetarian salad tomato basil",
         # "quick easy dinner",
+        "beef steak",
         "beef pasta",
         "beef"
     ]

pages/3_Recipe_Recommendation.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from utils.layout import render_layout
 import streamlit as st
 import time
-from model.search_script import search_for_recipes  # assumed you modularized this logic
 import streamlit.components.v1 as components
 def recipe_search_page():
@@ -9,7 +9,7 @@ def recipe_search_page():
         ## Advanced Recipe Recommendation
         <div class="about-box">
         This module uses a custom-trained BERT model to semantically search recipes
-        based on your query, ingredients, and tags.
         </div>
     """, unsafe_allow_html=True)
@@ -25,7 +25,7 @@ def recipe_search_page():
     query = st.text_input(
         "Search for recipes:",
-        placeholder="e.g., 'chicken pasta', 'vegetarian salad', 'chocolate dessert', 'quick easy "
     )
     col1, col2 = st.columns(2)
@@ -50,36 +50,38 @@ def recipe_search_page():
                 description = recipe.get("description", "").strip().capitalize()
                 html_code = f"""
-                <div style="margin-bottom: 24px; padding: 16px; border-radius: 12px; background-color: #fdfdfd; box-shadow: 0 2px 8px rgba(0,0,0,0.06); font-family: Arial, sans-serif;">
-                    <div style="font-size: 18px; font-weight: bold; color: #333;"> {i}. {recipe['name']}</div>
-                    <div style="margin: 4px 0 8px 0; font-size: 14px; color: #555;">
                          <b>{recipe['minutes']} min</b> &nbsp;&nbsp;|&nbsp;&nbsp; <b>{recipe['n_steps']} steps</b> &nbsp;&nbsp;|&nbsp;&nbsp; <b>{recipe['avg_rating']:.1f}/5.0</b>
-                        <span style="font-size: 12px; color: #999;">({recipe['num_ratings']} ratings)</span>
                     </div>
-                    <div style="margin-bottom: 6px; font-size: 14px;">
-                        <b>Match Score:</b> <span style="color: #007acc; font-weight: bold;">{recipe['similarity_score']:.1%}</span>
-                        <span style="font-size: 12px; color: #888;">(query match)</span><br>
                     </div>
-                    <div style="margin-bottom: 6px;">
                         <b>Tags:</b><br>
-                        {" ".join([f"<span style='background:#eee;padding:4px 8px;border-radius:6px;margin:2px;display:inline-block;font-size:12px'>{tag}</span>" for tag in recipe['tags']])}
                     </div>
-                    <div style="margin-bottom: 6px;">
                         <b>Ingredients:</b><br>
-                        <span style="font-size: 13px; color: #444;">{', '.join(recipe['ingredients'][:8])}
                         {'...' if len(recipe['ingredients']) > 8 else ''}</span>
                     </div>
-                    {"<div style='margin-top: 10px; font-size: 13px; color: #333;'><b>Description:</b><br>" + description + "</div>" if description else ""}
                     {"<div style='margin-top: 10px; font-size: 13px;'><b>Steps:</b><ol style='margin: 6px 0 0 18px; padding: 0;'>" + steps_html + "</ol></div>" if steps_html else ""}
                 </div>
                 """
-                components.html(html_code, height=360 + len(recipe.get("steps", [])) * 20)
         else:
             st.warning(f"No recipes found for '{query}' with a minimum rating of {min_rating}/5.0.")

 from utils.layout import render_layout
 import streamlit as st
 import time
+from model.search_script import search_for_recipes
 import streamlit.components.v1 as components
 def recipe_search_page():
         ## Advanced Recipe Recommendation
         <div class="about-box">
         This module uses a custom-trained BERT model to semantically search recipes
+        based on your query of ingredients and tags.
         </div>
     """, unsafe_allow_html=True)
     query = st.text_input(
         "Search for recipes:",
+        placeholder="e.g., 'chicken pasta italian', 'vegetarian salad', 'chocolate dessert', 'quick easy' "
     )
     col1, col2 = st.columns(2)
                 description = recipe.get("description", "").strip().capitalize()
                 html_code = f"""
+                <div style=\"margin: 8px 0 8px 0; padding: 8px; border-radius: 12px; background-color: #fdfdfd; box-shadow: 0 2px 8px rgba(0,0,0,0.06); font-family: Arial, sans-serif; border: 1px solid #e0e0e0;\">
+                    <div style=\"font-size: 18px; font-weight: bold; color: #333; margin-bottom: 8px;\"> {i}. {recipe['name']}</div>
+                    <div style=\"margin: 4px 0 12px 0; font-size: 14px; color: #555;\">
                          <b>{recipe['minutes']} min</b> &nbsp;&nbsp;|&nbsp;&nbsp; <b>{recipe['n_steps']} steps</b> &nbsp;&nbsp;|&nbsp;&nbsp; <b>{recipe['avg_rating']:.1f}/5.0</b>
+                        <span style=\"font-size: 12px; color: #999;\">({recipe['num_ratings']} ratings)</span>
                     </div>
+                    <div style=\"margin-bottom: 8px; font-size: 14px;\">
+                        <b>Match Score:</b> <span style=\"color: #007acc; font-weight: bold;\">{recipe['similarity_score']:.1%}</span>
+                        <span style=\"font-size: 12px; color: #888;\">(query match)</span>
                     </div>
+                    <div style=\"margin-bottom: 8px;\">
                         <b>Tags:</b><br>
+                        <div style=\"margin-top: 8px;\">
+                            {" ".join([f"<span style='background:#eee;padding:4px 8px;border-radius:6px;margin:2px;display:inline-block;font-size:12px'>{tag}</span>" for tag in recipe['tags']])}
+                        </div>
                     </div>
+                    <div style=\"margin-bottom: 8px;\">
                         <b>Ingredients:</b><br>
+                        <span style=\"font-size: 13px; color: #444; margin-top: 4px; display: block;\">{', '.join(recipe['ingredients'][:8])}
                         {'...' if len(recipe['ingredients']) > 8 else ''}</span>
                     </div>
+                    {"<div style='margin-top: 10px; font-size: 13px; color: #333;'><b>Description:</b><br><span style='margin-top: 4px; display: block;'>" + description + "</span></div>" if description else ""}
                     {"<div style='margin-top: 10px; font-size: 13px;'><b>Steps:</b><ol style='margin: 6px 0 0 18px; padding: 0;'>" + steps_html + "</ol></div>" if steps_html else ""}
                 </div>
                 """
+                components.html(html_code, height=340, scrolling=True)
         else:
             st.warning(f"No recipes found for '{query}' with a minimum rating of {min_rating}/5.0.")

pages/4_Report.py CHANGED Viewed

@@ -1,90 +1,124 @@
 import streamlit as st
 def render_report():
-    st.title("Group 5: Term Project Report")
     # Title Page Information
     st.markdown("""
-    **Course:** CSE 555 — Introduction to Pattern Recognition
     **Authors:** Saksham Lakhera and Ahmed Zaher
     **Date:** July 2025
     """)
     # Abstract
-    st.header("Abstract")
-    st.subheader("NLP Engineering Perspective")
     st.markdown("""
-    This project addresses the challenge of improving recipe recommendation systems through
-    advanced semantic search capabilities using transformer-based language models. Traditional
-    keyword-based search methods often fail to capture the nuanced relationships between
-    ingredients, cooking techniques, and user preferences in culinary contexts.
     Our approach leverages BERT (Bidirectional Encoder Representations from Transformers)
     fine-tuning on a custom recipe dataset to develop a semantic understanding of culinary content.
     We preprocessed and structured a subset of 15,000 recipes into standardized sequences organized
     by food categories (proteins, vegetables, legumes, etc.) to create training data optimized for
     the BERT architecture.
     The model was fine-tuned to learn contextual embeddings that capture semantic relationships
-    between ingredients and tags. At inference time we generate embeddings for all recipes in our
     dataset and perform cosine-similarity retrieval to produce the top-K most relevant recipes
     for a user query.
     """)
     # Introduction
-    st.header("Introduction")
     st.markdown("""
-    This term project serves primarily as an educational exercise aimed at giving students
     end-to-end exposure to building a modern NLP system. Our goal is to construct a semantic
     recipe-search engine that demonstrates how domain-specific fine-tuning of BERT can
     substantially improve retrieval quality over simple keyword matching.
     **Key Contributions:**
     - A cleaned, category-labelled recipe subset of 15,000 recipes
-    - Training scripts that yield domain-adapted contextual embeddings
     - A production-ready retrieval service that returns top-K most relevant recipes
     - Comparative evaluation against classical baselines
     """)
     # Dataset and Preprocessing
-    st.header("Dataset and Pre-processing")
-    st.subheader("Data Sources")
     st.markdown("""
     The project draws from two CSV files:
-    - **Raw_recipes.csv** – 231,637 rows, one per recipe with columns: *id, name, ingredients, tags, minutes, steps, description, n_steps, n_ingredients*
-    - **Raw_interactions.csv** – user feedback containing *recipe_id, user_id, rating (1-5), review text*
     """)
-    st.subheader("Corpus Filtering and Subset Selection")
     st.markdown("""
-    1. **Invalid rows removed** – recipes with empty ingredient lists, missing tags, or fewer than three total tags
-    2. **Random sampling** – 15,000 recipes selected for NLP fine-tuning
-    3. **Positive/negative pairs** – generated for contrastive learning using ratings and tag similarity
-    4. **Train/test split** – 80/20 stratified split (12,000/3,000 pairs)
     """)
-    st.subheader("Text Pre-processing Pipeline")
     st.markdown("""
-    - **Lower-casing & punctuation removal** – normalized to lowercase, special characters stripped
-    - **Stop-descriptor removal** – culinary modifiers (*fresh, chopped, minced*) and measurements removed
-    - **Ingredient ordering** – re-ordered into sequence: **protein → vegetables → grains → dairy → other**
-    - **Tag normalization** – mapped to six canonical slots: *cuisine, course, main-ingredient, dietary, difficulty, occasion*
-    - **Tokenization** – standard *bert-base-uncased* WordPiece tokenizer, sequences truncated/padded to 128 tokens
     """)
     # Methodology
-    st.header("Methodology")
-    st.subheader("Model Architecture")
     st.markdown("""
-    - **Base Model:** `bert-base-uncased` checkpoint
-    - **Additional Layers:** Single linear classification layer (768 → 1) with dropout (p = 0.1)
-    - **Training Objective:** Triplet-margin loss with margin of 1.0
     """)
-    st.subheader("Hyperparameters")
     col1, col2 = st.columns(2)
     with col1:
         st.markdown("""
@@ -98,108 +132,94 @@ def render_report():
         - **Optimizer:** AdamW
         - **Epochs:** 3
         - **Hardware:** Google Colab A100 GPU (40 GB VRAM)
-        - **Training time:** ~75 minutes per run
         """)
     # Mathematical Formulations
-    st.header("Mathematical Formulations")
-    st.subheader("Query Embedding and Similarity Calculation")
     st.latex(r"""
         \text{Similarity}(q, r_i) = \cos(\hat{q}, \hat{r}_i) = \frac{\hat{q} \cdot \hat{r}_i}{\|\hat{q}\|\|\hat{r}_i\|}
     """)
     st.markdown("Where $\\hat{q}$ is the BERT embedding of the query, and $\\hat{r}_i$ is the embedding of the i-th recipe.")
-    st.subheader("Final Score Calculation")
-    st.latex(r"""
-        \text{Score}_i = 0.6 \times \text{Similarity}_i + 0.4 \times \text{Popularity}_i
-    """)
     # Results
-    st.header("Results")
-    st.subheader("Training and Validation Loss")
     results_data = {
         "Run": [1, 2, 3, 4],
         "Configuration": [
             "Raw, no cleaning/ordering",
             "Cleaned text, unordered",
-            "Cleaned text + dropout",
-            "Cleaned text + dropout + ordering"
         ],
         "Epoch-3 Train Loss": [0.0065, 0.0023, 0.0061, 0.0119],
         "Validation Loss": [0.1100, 0.0000, 0.0118, 0.0067]
     }
     st.table(results_data)
     st.markdown("""
-    **Key Finding:** Run 4 (cleaned text + dropout + ordering) achieved the best balance
     between low validation loss and meaningful retrieval quality.
     """)
-    st.subheader("Qualitative Retrieval Examples")
     st.markdown("""
     **Query: "beef steak dinner"**
     - Run 1 (Raw): *to die for crock pot roast*, *crock pot chicken with black beans*
     - Run 4 (Final): *grilled garlic steak dinner*, *classic beef steak au poivre*
     **Query: "chicken italian pasta"**
     - Run 1 (Raw): *to die for crock pot roast*, *crock pot chicken with black beans*
     - Run 4 (Final): *creamy tuscan chicken pasta*, *italian chicken penne bake*
     **Query: "vegetarian salad healthy"**
-    - Run 1 (Raw): (irrelevant hits)
     - Run 4 (Final): *kale quinoa power salad*, *superfood spinach & berry salad*
     """)
     # Discussion and Conclusion
-    st.header("Discussion and Conclusion")
     st.markdown("""
     The experimental evidence underscores the importance of disciplined pre-processing when
-    adapting large language models to niche domains. The breakthrough came with **ingredient-ordering**
-    (protein → vegetables → grains → dairy → other) which supplied consistent positional signals.
     **Key Achievements:**
     - End-to-end recipe recommendation system with semantic search
-    - Sub-second latency across 231k recipes
     - Meaningful semantic understanding of culinary content
     - Reproducible blueprint for domain-specific NLP applications
     **Limitations:**
-    - Private dataset relatively small (15k samples) compared to public corpora
     - Minimal hyperparameter search conducted
     - Single-machine deployment tested
     """)
-    # Technical Specifications
-    st.header("Technical Specifications")
-    col1, col2 = st.columns(2)
-    with col1:
-        st.markdown("""
-        **Dataset:**
-        - Total Recipes: 231,630
-        - Training Set: 15,000 recipes
-        - Average Tags per Recipe: ~6
-        - Ingredients per Recipe: 3-20
-        """)
-    with col2:
-        st.markdown("""
-        **Infrastructure:**
-        - Python 3.10
-        - PyTorch 2.1 (CUDA 11.8)
-        - Transformers 4.38
-        - Google Colab A100 GPU
-        """)
     # References
-    st.header("References")
     st.markdown("""
-    [1] Vaswani et al., "Attention Is All You Need," NeurIPS, 2017.
-    [2] Devlin et al., "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding," NAACL-HLT, 2019.
-    [3] Reimers and Gurevych, "Sentence-BERT: Sentence Embeddings Using Siamese BERT-Networks," EMNLP-IJCNLP, 2019.
     [4] Hugging Face, "BERT Model Documentation," 2024.
     """)
@@ -207,5 +227,5 @@ def render_report():
     st.markdown("© 2025 CSE 555 Term Project. All rights reserved.")
 # Render the report
-render_report()

 import streamlit as st
+from utils.layout import render_layout
 def render_report():
+    st.title("Image Classification CV and Fine-Tuned NLP Recipe Recommendation")
     # Title Page Information
     st.markdown("""
     **Authors:** Saksham Lakhera and Ahmed Zaher
     **Date:** July 2025
     """)
     # Abstract
+    st.subheader("Abstract")
     st.markdown("""
+    **NLP Engineering Perspective:**
+    This project addresses the challenge of improving recipe recommendation systems through
+    advanced semantic search capabilities using transformer-based language models. This will explain how to fine-tune a model
+    to learn domain-specific context to capture the nuanced relationships between
+    ingredients and cooking techniques in culinary contexts.
     Our approach leverages BERT (Bidirectional Encoder Representations from Transformers)
     fine-tuning on a custom recipe dataset to develop a semantic understanding of culinary content.
     We preprocessed and structured a subset of 15,000 recipes into standardized sequences organized
     by food categories (proteins, vegetables, legumes, etc.) to create training data optimized for
     the BERT architecture.
     The model was fine-tuned to learn contextual embeddings that capture semantic relationships
+    between ingredients and tags. At the end, we generate embeddings for all recipes in our
     dataset and perform cosine-similarity retrieval to produce the top-K most relevant recipes
     for a user query.
     """)
     # Introduction
+    st.subheader("Introduction")
     st.markdown("""
+    This term project serves primarily as an educational exercise aimed at giving
     end-to-end exposure to building a modern NLP system. Our goal is to construct a semantic
     recipe-search engine that demonstrates how domain-specific fine-tuning of BERT can
     substantially improve retrieval quality over simple keyword matching.
     **Key Contributions:**
     - A cleaned, category-labelled recipe subset of 15,000 recipes
+    - Training scripts that yield adapted contextual embeddings
     - A production-ready retrieval service that returns top-K most relevant recipes
     - Comparative evaluation against classical baselines
     """)
     # Dataset and Preprocessing
+    st.subheader("Dataset and Pre-processing")
     st.markdown("""
+    **Data Sources:**
     The project draws from two CSV files:
+    - **Raw_recipes.csv:** 231,637 rows, one per recipe with columns: *id, name, ingredients, tags, minutes, steps, description, n_steps, n_ingredients*
+    - **Raw_interactions.csv:** user feedback containing *recipe_id, user_id, rating, review text*
     """)
     st.markdown("""
+    **Corpus Filtering and Subset Selection**
+    - **Invalid rows removed:** recipes with empty ingredient lists, missing tags, or fewer than three total tags
+    - **Random sampling:** 15,000 recipes selected for NLP fine-tuning
+    - **Positive/negative pairs:** generated for contrastive learning using ratings and tag similarity
+    - **Train/test split:** 80/20 stratified split (12,000/3,000 pairs)
     """)
     st.markdown("""
+    **Text Pre-processing Pipeline**
+    - **Lower-casing & punctuation removal:** normalized to lowercase, special characters stripped
+    - **Stop-descriptor removal:** culinary modifiers (*fresh, chopped, minced*) and measurements (tablespoons, teaspoons, cups, etc.) removed
+    - **Ingredient ordering:** re-ordered into sequence: protein → vegetables/grains/ dairy → other
+    - **Tag normalization:** mapped to 7 main categories: *cuisine, course, main-ingredient, dietary, difficulty, occasion, cooking_method*
+    - **Tokenization:** standard *bert-base-uncased* WordPiece tokenizer, sequences truncated/padded to 128 tokens
     """)
+    # Technical Specifications
+    st.subheader("Technical Specifications")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.markdown("""
+        **Dataset:**
+        - Total Recipes: 231,630
+        - Training Set: 12,000 recipes
+        - Average Tags per Recipe: ~6
+        - Ingredients per Recipe: 3-20
+        """)
+    with col2:
+        st.markdown("""
+        **Infrastructure:**
+        - Python 3.10
+        - PyTorch 2.1 (CUDA 11.8)
+        - Transformers 4.38
+        - Google Colab A100 GPU
+        """)
     # Methodology
+    st.subheader("Methodology")
     st.markdown("""
+    **Model Architecture**
+    - **Base Model:** bert-base-uncased
+    - **Additional Layers:** In some runs, we added a single linear classification layer with dropout (p = 0.1)
+    - **Training Objective:** Triplet-margin loss with margin of 1.0
+    We trained the model directly on the raw data to see if we will get any good results. As seen in table 1, this run resulted in a very low training error
+    but when ran on the validation set, the training error was higher. We then used cleaned up the data by removing any empty space, standardized to lower text, removed
+    all punctuation and retrained the model. This resulted in a highly overfitted model as seen in table 1 and the results section below. Next, we added a single linear layer on top of
+    the BERT's current architecture and added a dropout to get rid of overfitting. The results as shown in table 1 were better. Although the semantic
+    results were better than before, it still was not good in indentifying the relashionships between ingredients and the different tags. We then further
+    structured the data by ordering the tags and ingredients in a strcutured manner across the dataset and retrained the model. This resulted in a better
+    training and validation loss. This is also evident in the semantic retrieval results below.
+    **Website Development:**
+    - We used streamlit to develop the websit. However, we faced few issues with the size of the trained model and we switched hosting to Hugging Face.
+    - The website loades the pre-trained model along with recipes embeddings and top-k retrieval function and waits for the user to enter a query.
+    - The query is then processed b the model and top-k recipes are returned.
     """)
+    st.markdown("**Hyperparameters and Training**")
     col1, col2 = st.columns(2)
     with col1:
         st.markdown("""
         - **Optimizer:** AdamW
         - **Epochs:** 3
         - **Hardware:** Google Colab A100 GPU (40 GB VRAM)
+        - **Training time:** ~30 minutes per run
         """)
     # Mathematical Formulations
+    st.subheader("Mathematical Formulations and Top-K Retrieval")
+    st.markdown("""**Query Embedding and Similarity Calculation**: we used the trained model weights to generate embeddings for the entire recipe corpus. We then used cosine similarity to calculate the similarity between the query and the recipe corpus.
+    and once the user query is passed, we embedded the querry using the trained model and used the cosine similarity formula below to retrieve the top-K
+    recipes. We then filtered the only ones that have an average rating >= 3.0 and at least 5 ratings. We then sorted the recipes by similarity and then by average rating.
+    """)
     st.latex(r"""
         \text{Similarity}(q, r_i) = \cos(\hat{q}, \hat{r}_i) = \frac{\hat{q} \cdot \hat{r}_i}{\|\hat{q}\|\|\hat{r}_i\|}
     """)
     st.markdown("Where $\\hat{q}$ is the BERT embedding of the query, and $\\hat{r}_i$ is the embedding of the i-th recipe.")
     # Results
+    st.subheader("Results")
+    st.markdown("**Training and Validation Loss**")
     results_data = {
         "Run": [1, 2, 3, 4],
         "Configuration": [
             "Raw, no cleaning/ordering",
             "Cleaned text, unordered",
+            "Cleaned text + single layer + dropout",
+            "Cleaned text + ordering"
         ],
         "Epoch-3 Train Loss": [0.0065, 0.0023, 0.0061, 0.0119],
         "Validation Loss": [0.1100, 0.0000, 0.0118, 0.0067]
     }
     st.table(results_data)
+    st.markdown("""Table 1: Training and Validation Loss for each run""")
     st.markdown("""
+    **Key Finding:** Run 4 (cleaned text + ordering) achieved the best balance
     between low validation loss and meaningful retrieval quality.
     """)
+    st.markdown("**Qualitative Retrieval Examples**")
     st.markdown("""
+    In this section, we will show how the results of the model differ between runs and how the model performs on different queries.
     **Query: "beef steak dinner"**
     - Run 1 (Raw): *to die for crock pot roast*, *crock pot chicken with black beans*
+    - Run 2 (Cleaned text, unordered): *aussie pepper steak   steak with creamy pepper sauce*
+    - Run 3 (Cleaned text + single layer + dropout): *balsamic rib eye steak with bleu cheese sauce*
     - Run 4 (Final): *grilled garlic steak dinner*, *classic beef steak au poivre*
     **Query: "chicken italian pasta"**
     - Run 1 (Raw): *to die for crock pot roast*, *crock pot chicken with black beans*
+    - Run 2 (Cleaned text, unordered): *baked chicken soup*
+    - Run 3 (Cleaned text + single layer + dropout): *absolute best ever lasagna*
     - Run 4 (Final): *creamy tuscan chicken pasta*, *italian chicken penne bake*
     **Query: "vegetarian salad healthy"**
+    - Run 1 (Raw): *to die for crock pot roast*
+    - Run 2 (Cleaned text, unordered): *avocado mandarin salad*
+    - Run 3 (Cleaned text + single layer + dropout): *black bean and sweet potato salad*
     - Run 4 (Final): *kale quinoa power salad*, *superfood spinach & berry salad*
     """)
     # Discussion and Conclusion
+    st.subheader("Discussion and Conclusion")
     st.markdown("""
     The experimental evidence underscores the importance of disciplined pre-processing when
+    adapting large language models to niche domains. The breakthrough came with ingredient-ordering
+    (protein → vegetables → grains → dairy → other) which supplied consistent positional signals. As we can see in the results,
+    the performance of the model improves with the addition of the single layer and dropout but the results are still not as good as the final run where
+    we added the ordering of the ingredients.
     **Key Achievements:**
     - End-to-end recipe recommendation system with semantic search
     - Meaningful semantic understanding of culinary content
     - Reproducible blueprint for domain-specific NLP applications
     **Limitations:**
+    - Private dataset relatively small training set (12k samples) compared to public corpora
+    - Further pre-processing could be done to improve the results
     - Minimal hyperparameter search conducted
     - Single-machine deployment tested
+    - The model is not able to handle complex queries and it is not able to handle synonyms and antonyms.
     """)
     # References
+    st.subheader("References")
     st.markdown("""
+    [1] Vaswani et al., "Attention Is All You Need," NeurIPS, 2017.
+    [2] Devlin et al., "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding," NAACL-HLT, 2019.
+    [3] Reimers and Gurevych, "Sentence-BERT: Sentence Embeddings Using Siamese BERT-Networks," EMNLP-IJCNLP, 2019.
     [4] Hugging Face, "BERT Model Documentation," 2024.
     """)
     st.markdown("© 2025 CSE 555 Term Project. All rights reserved.")
 # Render the report
+render_layout(render_report)

utils/layout.py CHANGED Viewed

@@ -3,7 +3,7 @@ import streamlit as st
 def set_custom_page_config():
     st.set_page_config(
-        page_title="Smart Kitchen Assistant",
         layout="wide",
         initial_sidebar_state="expanded"
     )
@@ -11,15 +11,15 @@ def set_custom_page_config():
 def render_header():
     st.markdown("""
         <div class="project-header">
-            <h1>Smart Kitchen Assistant</h1>
-            <p>CSE555 Final Project — Group 5: Saksham & Ahmed</p>
         </div>
     """, unsafe_allow_html=True)
 def render_footer():
     st.markdown("""
         <div class="footer">
-            <p>Made with ❤️ by Saksham & Ahmed | CSE555 @ UB</p>
         </div>
     """, unsafe_allow_html=True)

 def set_custom_page_config():
     st.set_page_config(
+        page_title="Computer Vision and Natural Language Processing Project",
         layout="wide",
         initial_sidebar_state="expanded"
     )
 def render_header():
     st.markdown("""
         <div class="project-header">
+            <h1>Computer Vision and Natural Language Processing Project</h1>
+            <p>CSE555 Final Project — Group 5: Saksham Lakhera & Ahmed Zaher</p>
         </div>
     """, unsafe_allow_html=True)
 def render_footer():
     st.markdown("""
         <div class="footer">
+            <p>Made with ❤️ by Saksham & Zaher | CSE555 @ UB</p>
         </div>
     """, unsafe_allow_html=True)