Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,220 +1,156 @@
|
|
1 |
import streamlit as st
|
2 |
import google.generativeai as genai
|
3 |
import json
|
|
|
4 |
import pandas as pd
|
5 |
-
import
|
6 |
from typing import List, Dict, Any
|
7 |
import re
|
8 |
-
from
|
9 |
-
|
10 |
import pickle
|
11 |
-
import os
|
12 |
|
13 |
# Configure page
|
14 |
st.set_page_config(
|
15 |
-
page_title="π³
|
16 |
page_icon="π³",
|
17 |
layout="wide",
|
18 |
-
initial_sidebar_state="
|
19 |
)
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
class EnhancedRecipeRAG:
|
22 |
-
"""Enhanced Recipe RAG with Multiple Dataset
|
23 |
|
24 |
def __init__(self):
|
25 |
self.api_key = None
|
26 |
self.model = None
|
27 |
-
self.
|
28 |
-
self.vectorizer = None
|
29 |
-
self.recipe_vectors = None
|
30 |
self.dataset_loaded = False
|
31 |
-
|
32 |
-
def
|
33 |
-
"""
|
34 |
-
return [
|
35 |
-
{
|
36 |
-
"name": "Classic Scrambled Eggs",
|
37 |
-
"ingredients": ["eggs", "butter", "salt", "pepper", "milk"],
|
38 |
-
"category": "breakfast",
|
39 |
-
"cuisine": "american",
|
40 |
-
"instructions": ["Beat eggs with milk", "Heat butter in pan", "Add eggs and scramble gently"],
|
41 |
-
"prep_time": 5,
|
42 |
-
"cook_time": 5
|
43 |
-
},
|
44 |
-
# ... more sample recipes
|
45 |
-
]
|
46 |
-
|
47 |
-
def load_dataset_from_csv(self, file_path: str, format_type: str = "auto") -> bool:
|
48 |
-
"""Load recipes from CSV dataset"""
|
49 |
try:
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
self.recipe_database = self.parse_epicurious_format(df)
|
59 |
-
else:
|
60 |
-
self.recipe_database = self.parse_generic_format(df)
|
61 |
|
62 |
-
self.build_search_index()
|
63 |
self.dataset_loaded = True
|
64 |
return True
|
65 |
-
|
66 |
except Exception as e:
|
67 |
st.error(f"Error loading dataset: {str(e)}")
|
68 |
return False
|
69 |
|
70 |
-
def
|
71 |
-
"""
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
-
def
|
90 |
-
"""
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
"prep_time": row.get("minutes", 30),
|
100 |
-
"source": "Food.com"
|
101 |
-
}
|
102 |
-
if recipe["ingredients"]:
|
103 |
-
recipes.append(recipe)
|
104 |
-
except:
|
105 |
-
continue
|
106 |
-
return recipes
|
107 |
|
108 |
-
def
|
109 |
-
"""
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
"cuisine": row.get("cuisine", "unknown"),
|
119 |
-
"source": "Epicurious"
|
120 |
-
}
|
121 |
-
if recipe["ingredients"]:
|
122 |
-
recipes.append(recipe)
|
123 |
-
except:
|
124 |
-
continue
|
125 |
-
return recipes
|
126 |
|
127 |
-
def
|
128 |
-
"""
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
|
|
|
|
139 |
recipe = {
|
140 |
-
"name": row
|
141 |
-
"ingredients":
|
142 |
-
"
|
143 |
-
"
|
144 |
-
"source": "Custom Dataset"
|
145 |
}
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
if col.lower() in [name.lower() for name in possible_names]:
|
156 |
-
return col
|
157 |
-
return None
|
158 |
-
|
159 |
-
def parse_ingredients(self, ingredients_text: str) -> List[str]:
|
160 |
-
"""Parse ingredients from various text formats"""
|
161 |
-
if pd.isna(ingredients_text) or not ingredients_text:
|
162 |
-
return []
|
163 |
-
|
164 |
-
# Handle JSON format
|
165 |
-
if ingredients_text.startswith('['):
|
166 |
-
try:
|
167 |
-
return json.loads(ingredients_text.replace("'", '"'))
|
168 |
-
except:
|
169 |
-
pass
|
170 |
-
|
171 |
-
# Handle comma-separated
|
172 |
-
if ',' in ingredients_text:
|
173 |
-
return [ing.strip() for ing in ingredients_text.split(',') if ing.strip()]
|
174 |
-
|
175 |
-
# Handle newline-separated
|
176 |
-
if '\n' in ingredients_text:
|
177 |
-
return [ing.strip() for ing in ingredients_text.split('\n') if ing.strip()]
|
178 |
-
|
179 |
-
# Single ingredient or space-separated
|
180 |
-
return [ing.strip() for ing in ingredients_text.split() if ing.strip()]
|
181 |
-
|
182 |
-
def parse_instructions(self, instructions_text: str) -> List[str]:
|
183 |
-
"""Parse cooking instructions"""
|
184 |
-
if pd.isna(instructions_text) or not instructions_text:
|
185 |
-
return []
|
186 |
-
|
187 |
-
# Handle JSON format
|
188 |
-
if instructions_text.startswith('['):
|
189 |
-
try:
|
190 |
-
return json.loads(instructions_text.replace("'", '"'))
|
191 |
-
except:
|
192 |
-
pass
|
193 |
-
|
194 |
-
# Handle numbered steps or sentences
|
195 |
-
steps = re.split(r'\d+\.|\n', instructions_text)
|
196 |
-
return [step.strip() for step in steps if step.strip()]
|
197 |
-
|
198 |
-
def build_search_index(self):
|
199 |
-
"""Build TF-IDF search index for better retrieval"""
|
200 |
-
if not self.recipe_database:
|
201 |
-
return
|
202 |
-
|
203 |
-
# Create text representation for each recipe
|
204 |
-
recipe_texts = []
|
205 |
-
for recipe in self.recipe_database:
|
206 |
-
text = f"{recipe['name']} {' '.join(recipe['ingredients'])}"
|
207 |
-
if recipe.get('category'):
|
208 |
-
text += f" {recipe['category']}"
|
209 |
-
recipe_texts.append(text)
|
210 |
-
|
211 |
-
# Build TF-IDF vectors
|
212 |
-
self.vectorizer = TfidfVectorizer(
|
213 |
-
stop_words='english',
|
214 |
-
ngram_range=(1, 2),
|
215 |
-
max_features=5000
|
216 |
-
)
|
217 |
-
self.recipe_vectors = self.vectorizer.fit_transform(recipe_texts)
|
218 |
|
219 |
def setup_gemini(self, api_key: str) -> bool:
|
220 |
"""Initialize Gemini API"""
|
@@ -228,85 +164,83 @@ class EnhancedRecipeRAG:
|
|
228 |
return False
|
229 |
|
230 |
def retrieve_relevant_recipes(self, user_ingredients: List[str], top_k: int = 5) -> List[Dict]:
|
231 |
-
"""Enhanced retrieval
|
232 |
-
if not self.dataset_loaded or not self.vectorizer:
|
233 |
-
return self.basic_ingredient_matching(user_ingredients)
|
234 |
-
|
235 |
-
# Create query vector
|
236 |
-
query = ' '.join(user_ingredients)
|
237 |
-
query_vector = self.vectorizer.transform([query])
|
238 |
-
|
239 |
-
# Calculate similarities
|
240 |
-
similarities = cosine_similarity(query_vector, self.recipe_vectors).flatten()
|
241 |
-
|
242 |
-
# Get top matches
|
243 |
-
top_indices = similarities.argsort()[-top_k:][::-1]
|
244 |
-
|
245 |
-
relevant_recipes = []
|
246 |
-
for idx in top_indices:
|
247 |
-
if similarities[idx] > 0.1: # Minimum similarity threshold
|
248 |
-
recipe = self.recipe_database[idx].copy()
|
249 |
-
recipe['similarity_score'] = similarities[idx]
|
250 |
-
relevant_recipes.append(recipe)
|
251 |
-
|
252 |
-
return relevant_recipes
|
253 |
-
|
254 |
-
def basic_ingredient_matching(self, user_ingredients: List[str]) -> List[Dict]:
|
255 |
-
"""Fallback method for simple ingredient matching"""
|
256 |
user_ingredients = [ing.lower().strip() for ing in user_ingredients]
|
257 |
relevant_recipes = []
|
258 |
|
259 |
-
for recipe in
|
260 |
-
recipe_ingredients = [ing.lower() for ing in recipe["ingredients"]]
|
|
|
|
|
261 |
overlap = len(set(user_ingredients) & set(recipe_ingredients))
|
262 |
|
263 |
if overlap > 0:
|
264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
relevant_recipes.append({
|
266 |
**recipe,
|
267 |
-
"relevance_score":
|
268 |
-
"matching_ingredients": overlap
|
|
|
269 |
})
|
270 |
|
|
|
271 |
relevant_recipes.sort(key=lambda x: x["relevance_score"], reverse=True)
|
272 |
-
return relevant_recipes[:
|
273 |
|
274 |
def generate_recipes_with_gemini(self, user_ingredients: List[str], relevant_recipes: List[Dict]) -> List[Dict]:
|
275 |
-
"""
|
|
|
276 |
ingredients_text = ", ".join(user_ingredients)
|
277 |
|
278 |
-
# Create
|
279 |
-
context_text = "
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
context_text += f" Style: {recipe['instructions'][0][:50]}...\n"
|
284 |
|
285 |
prompt = f"""
|
286 |
-
|
287 |
|
|
|
288 |
{context_text}
|
289 |
|
290 |
-
|
291 |
|
292 |
-
|
293 |
-
|
294 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
295 |
|
296 |
-
|
297 |
{{
|
298 |
"recipes": [
|
299 |
{{
|
300 |
"name": "Recipe Name",
|
301 |
"ingredients_with_quantities": ["2 eggs", "1 tbsp butter"],
|
302 |
-
"instructions": ["Step 1", "Step 2"],
|
303 |
"prep_time": 10,
|
304 |
"cook_time": 15,
|
305 |
-
"tip": "
|
306 |
-
"cuisine": "
|
|
|
307 |
}}
|
308 |
]
|
309 |
}}
|
|
|
|
|
310 |
"""
|
311 |
|
312 |
try:
|
@@ -315,23 +249,29 @@ class EnhancedRecipeRAG:
|
|
315 |
|
316 |
json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
|
317 |
if json_match:
|
318 |
-
|
|
|
319 |
return recipes_data.get("recipes", [])
|
320 |
-
|
|
|
|
|
321 |
except Exception as e:
|
322 |
st.error(f"Error generating recipes: {str(e)}")
|
323 |
-
|
|
|
|
|
|
|
|
|
324 |
return []
|
325 |
|
326 |
def main():
|
327 |
-
st.markdown('<h1
|
328 |
-
st.markdown("### Powered by Large Recipe Datasets + Google Gemini Pro")
|
329 |
|
330 |
# Initialize enhanced RAG system
|
331 |
-
if '
|
332 |
-
st.session_state.
|
333 |
|
334 |
-
rag_system = st.session_state.
|
335 |
|
336 |
# Sidebar configuration
|
337 |
with st.sidebar:
|
@@ -339,127 +279,148 @@ def main():
|
|
339 |
|
340 |
# API Key
|
341 |
api_key = st.text_input("Google Gemini API Key", type="password")
|
342 |
-
if api_key
|
343 |
if rag_system.setup_gemini(api_key):
|
344 |
-
st.session_state.current_api_key = api_key
|
345 |
st.success("β
API configured!")
|
346 |
|
347 |
st.markdown("---")
|
348 |
|
349 |
-
# Dataset
|
350 |
-
st.header("
|
351 |
-
|
352 |
dataset_option = st.selectbox(
|
353 |
-
"Choose
|
354 |
-
["
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
)
|
356 |
|
357 |
-
|
358 |
-
|
|
|
|
|
|
|
|
|
|
|
359 |
if uploaded_file:
|
360 |
-
|
361 |
-
"
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
with st.spinner("Loading dataset..."):
|
367 |
-
# Save uploaded file temporarily
|
368 |
-
with open("temp_dataset.csv", "wb") as f:
|
369 |
-
f.write(uploaded_file.getbuffer())
|
370 |
-
|
371 |
-
if rag_system.load_dataset_from_csv("temp_dataset.csv", dataset_format):
|
372 |
-
st.success(f"β
Loaded {len(rag_system.recipe_database)} recipes!")
|
373 |
-
|
374 |
-
# Clean up
|
375 |
-
if os.path.exists("temp_dataset.csv"):
|
376 |
-
os.remove("temp_dataset.csv")
|
377 |
-
|
378 |
-
elif dataset_option == "Use Kaggle Dataset":
|
379 |
-
st.markdown("""
|
380 |
-
**Popular Datasets:**
|
381 |
-
- RecipeNLG: 2.2M recipes
|
382 |
-
- Food.com: 500K recipes
|
383 |
-
- Epicurious: 13K recipes
|
384 |
-
|
385 |
-
Download from Kaggle and upload above!
|
386 |
-
""")
|
387 |
|
388 |
-
# Dataset
|
389 |
if rag_system.dataset_loaded:
|
390 |
-
st.
|
391 |
-
|
392 |
-
|
393 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
394 |
# Main interface
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
395 |
col1, col2 = st.columns([3, 1])
|
396 |
|
397 |
with col1:
|
398 |
ingredients_input = st.text_input(
|
399 |
-
"π₯ Enter
|
400 |
-
placeholder="
|
401 |
help="Separate ingredients with commas"
|
402 |
)
|
403 |
|
404 |
with col2:
|
|
|
405 |
generate_button = st.button("π Generate Recipes", type="primary", use_container_width=True)
|
406 |
|
407 |
-
#
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
user_ingredients = [ing.strip() for ing in ingredients_input.split(',') if ing.strip()]
|
418 |
|
419 |
-
with st.spinner("π€
|
420 |
-
# RAG
|
421 |
-
relevant_recipes = rag_system.retrieve_relevant_recipes(user_ingredients)
|
422 |
generated_recipes = rag_system.generate_recipes_with_gemini(user_ingredients, relevant_recipes)
|
423 |
|
424 |
-
# Display results
|
425 |
if generated_recipes:
|
426 |
st.markdown("## π½οΈ Your Personalized Recipes")
|
427 |
|
428 |
# Show retrieval context
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
st.write(f"**{recipe['name']}** (Match: {score:.2f})")
|
434 |
-
st.write(f"Ingredients: {', '.join(recipe['ingredients'][:5])}...")
|
435 |
|
436 |
# Display generated recipes
|
437 |
-
for i, recipe in enumerate(generated_recipes, 1):
|
438 |
-
with st.expander(f"
|
439 |
|
440 |
-
#
|
441 |
-
col1, col2, col3 = st.columns(
|
442 |
with col1:
|
443 |
-
st.
|
444 |
with col2:
|
445 |
-
st.
|
446 |
with col3:
|
447 |
-
|
448 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
449 |
|
450 |
-
# Ingredients
|
451 |
st.markdown("#### π Ingredients:")
|
452 |
-
|
453 |
-
|
|
|
454 |
|
455 |
-
# Instructions
|
456 |
st.markdown("#### π¨βπ³ Instructions:")
|
457 |
-
|
458 |
-
|
|
|
459 |
|
460 |
-
|
461 |
-
if
|
462 |
-
st.
|
|
|
|
|
|
|
|
|
463 |
|
464 |
if __name__ == "__main__":
|
465 |
main()
|
|
|
1 |
import streamlit as st
|
2 |
import google.generativeai as genai
|
3 |
import json
|
4 |
+
import os
|
5 |
import pandas as pd
|
6 |
+
import requests
|
7 |
from typing import List, Dict, Any
|
8 |
import re
|
9 |
+
from io import StringIO
|
10 |
+
import sqlite3
|
11 |
import pickle
|
|
|
12 |
|
13 |
# Configure page
|
14 |
st.set_page_config(
|
15 |
+
page_title="π³ AI Recipe Generator Pro",
|
16 |
page_icon="π³",
|
17 |
layout="wide",
|
18 |
+
initial_sidebar_state="expanded"
|
19 |
)
|
20 |
|
21 |
+
# Custom CSS (same as before)
|
22 |
+
st.markdown("""<style>
|
23 |
+
.main-header {
|
24 |
+
text-align: center;
|
25 |
+
padding: 2rem 0;
|
26 |
+
background: linear-gradient(90deg, #ff6b6b, #4ecdc4);
|
27 |
+
-webkit-background-clip: text;
|
28 |
+
-webkit-text-fill-color: transparent;
|
29 |
+
background-clip: text;
|
30 |
+
font-size: 3rem;
|
31 |
+
font-weight: bold;
|
32 |
+
margin-bottom: 2rem;
|
33 |
+
}
|
34 |
+
</style>""", unsafe_allow_html=True)
|
35 |
+
|
36 |
class EnhancedRecipeRAG:
|
37 |
+
"""Enhanced Recipe RAG with Multiple Dataset Options"""
|
38 |
|
39 |
def __init__(self):
|
40 |
self.api_key = None
|
41 |
self.model = None
|
42 |
+
self.recipe_db = []
|
|
|
|
|
43 |
self.dataset_loaded = False
|
44 |
+
|
45 |
+
def load_dataset_option(self, option: str) -> bool:
|
46 |
+
"""Load different dataset options based on user choice"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
try:
|
48 |
+
if option == "lightweight":
|
49 |
+
self.recipe_db = self._load_lightweight_dataset()
|
50 |
+
elif option == "kaggle_ingredients":
|
51 |
+
self.recipe_db = self._load_kaggle_ingredients()
|
52 |
+
elif option == "huggingface":
|
53 |
+
self.recipe_db = self._load_huggingface_dataset()
|
54 |
+
elif option == "custom_csv":
|
55 |
+
return False # Handle separately
|
|
|
|
|
|
|
56 |
|
|
|
57 |
self.dataset_loaded = True
|
58 |
return True
|
|
|
59 |
except Exception as e:
|
60 |
st.error(f"Error loading dataset: {str(e)}")
|
61 |
return False
|
62 |
|
63 |
+
def _load_lightweight_dataset(self) -> List[Dict]:
|
64 |
+
"""Curated lightweight dataset (~50KB) - Perfect for HF Spaces"""
|
65 |
+
return [
|
66 |
+
# Breakfast
|
67 |
+
{"name": "Classic Scrambled Eggs", "ingredients": ["eggs", "butter", "salt", "pepper", "milk"], "category": "breakfast", "cuisine": "american", "prep_time": 5, "cook_time": 5},
|
68 |
+
{"name": "French Toast", "ingredients": ["bread", "eggs", "milk", "sugar", "cinnamon", "butter"], "category": "breakfast", "cuisine": "french", "prep_time": 10, "cook_time": 8},
|
69 |
+
{"name": "Pancakes", "ingredients": ["flour", "eggs", "milk", "sugar", "baking powder", "butter"], "category": "breakfast", "cuisine": "american", "prep_time": 10, "cook_time": 15},
|
70 |
+
{"name": "Avocado Toast", "ingredients": ["avocado", "bread", "salt", "pepper", "lemon", "olive oil"], "category": "breakfast", "cuisine": "modern", "prep_time": 5, "cook_time": 2},
|
71 |
+
|
72 |
+
# Main Dishes
|
73 |
+
{"name": "Spaghetti Aglio e Olio", "ingredients": ["pasta", "garlic", "olive oil", "red pepper", "parsley", "parmesan"], "category": "main", "cuisine": "italian", "prep_time": 5, "cook_time": 15},
|
74 |
+
{"name": "Chicken Stir Fry", "ingredients": ["chicken", "vegetables", "soy sauce", "garlic", "ginger", "oil"], "category": "main", "cuisine": "asian", "prep_time": 15, "cook_time": 10},
|
75 |
+
{"name": "Beef Tacos", "ingredients": ["ground beef", "tortillas", "onion", "garlic", "cumin", "tomato"], "category": "main", "cuisine": "mexican", "prep_time": 10, "cook_time": 15},
|
76 |
+
{"name": "Fish and Chips", "ingredients": ["fish", "potatoes", "flour", "beer", "oil", "salt"], "category": "main", "cuisine": "british", "prep_time": 20, "cook_time": 15},
|
77 |
+
|
78 |
+
# Vegetarian
|
79 |
+
{"name": "Margherita Pizza", "ingredients": ["dough", "tomato sauce", "mozzarella", "basil", "olive oil"], "category": "main", "cuisine": "italian", "prep_time": 30, "cook_time": 12},
|
80 |
+
{"name": "Vegetable Curry", "ingredients": ["vegetables", "coconut milk", "curry powder", "onion", "garlic", "ginger"], "category": "main", "cuisine": "indian", "prep_time": 15, "cook_time": 25},
|
81 |
+
{"name": "Greek Salad", "ingredients": ["tomato", "cucumber", "feta", "olives", "onion", "olive oil"], "category": "salad", "cuisine": "greek", "prep_time": 10, "cook_time": 0},
|
82 |
+
|
83 |
+
# Soups
|
84 |
+
{"name": "Tomato Soup", "ingredients": ["tomatoes", "onion", "garlic", "broth", "cream", "basil"], "category": "soup", "cuisine": "american", "prep_time": 10, "cook_time": 20},
|
85 |
+
{"name": "Chicken Noodle Soup", "ingredients": ["chicken", "noodles", "carrots", "celery", "onion", "broth"], "category": "soup", "cuisine": "american", "prep_time": 15, "cook_time": 30},
|
86 |
+
|
87 |
+
# Desserts
|
88 |
+
{"name": "Chocolate Chip Cookies", "ingredients": ["flour", "butter", "sugar", "eggs", "chocolate chips", "vanilla"], "category": "dessert", "cuisine": "american", "prep_time": 15, "cook_time": 12},
|
89 |
+
{"name": "Tiramisu", "ingredients": ["ladyfingers", "coffee", "mascarpone", "eggs", "sugar", "cocoa"], "category": "dessert", "cuisine": "italian", "prep_time": 30, "cook_time": 0},
|
90 |
+
|
91 |
+
# International
|
92 |
+
{"name": "Pad Thai", "ingredients": ["rice noodles", "shrimp", "eggs", "bean sprouts", "peanuts", "lime"], "category": "main", "cuisine": "thai", "prep_time": 20, "cook_time": 10},
|
93 |
+
{"name": "Biryani", "ingredients": ["rice", "chicken", "yogurt", "spices", "onion", "saffron"], "category": "main", "cuisine": "indian", "prep_time": 45, "cook_time": 60},
|
94 |
+
{"name": "Sushi Rolls", "ingredients": ["sushi rice", "nori", "fish", "cucumber", "avocado", "soy sauce"], "category": "main", "cuisine": "japanese", "prep_time": 30, "cook_time": 20},
|
95 |
+
{"name": "Paella", "ingredients": ["rice", "seafood", "chicken", "saffron", "peppers", "beans"], "category": "main", "cuisine": "spanish", "prep_time": 20, "cook_time": 30},
|
96 |
+
|
97 |
+
# Quick & Easy
|
98 |
+
{"name": "Grilled Cheese", "ingredients": ["bread", "cheese", "butter"], "category": "quick", "cuisine": "american", "prep_time": 2, "cook_time": 5},
|
99 |
+
{"name": "Quesadilla", "ingredients": ["tortillas", "cheese", "chicken", "peppers"], "category": "quick", "cuisine": "mexican", "prep_time": 5, "cook_time": 8},
|
100 |
+
{"name": "Caesar Salad", "ingredients": ["romaine", "parmesan", "croutons", "caesar dressing"], "category": "salad", "cuisine": "roman", "prep_time": 10, "cook_time": 0}
|
101 |
+
]
|
102 |
|
103 |
+
def _load_kaggle_ingredients(self) -> List[Dict]:
|
104 |
+
"""Load from Kaggle Recipe Ingredients Dataset (if available)"""
|
105 |
+
# Placeholder - In production, you'd download and parse Kaggle dataset
|
106 |
+
kaggle_recipes = [
|
107 |
+
{"name": "Thai Green Curry", "ingredients": ["green curry paste", "coconut milk", "chicken", "thai basil"], "category": "main", "cuisine": "thai"},
|
108 |
+
{"name": "Mexican Pozole", "ingredients": ["hominy", "pork", "red chilies", "oregano"], "category": "soup", "cuisine": "mexican"},
|
109 |
+
{"name": "Indian Dal", "ingredients": ["lentils", "turmeric", "cumin", "ginger"], "category": "main", "cuisine": "indian"},
|
110 |
+
{"name": "Japanese Ramen", "ingredients": ["ramen noodles", "miso", "pork", "green onions"], "category": "main", "cuisine": "japanese"}
|
111 |
+
]
|
112 |
+
return self._load_lightweight_dataset() + kaggle_recipes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
+
def _load_huggingface_dataset(self) -> List[Dict]:
|
115 |
+
"""Load from HuggingFace dataset hub"""
|
116 |
+
# In production, use: from datasets import load_dataset
|
117 |
+
# dataset = load_dataset("mbien/recipe_nlg", split="train[:1000]") # Limit for memory
|
118 |
+
hf_recipes = [
|
119 |
+
{"name": "Mediterranean Quinoa Bowl", "ingredients": ["quinoa", "olives", "feta", "cucumber"], "category": "healthy", "cuisine": "mediterranean"},
|
120 |
+
{"name": "Korean Bibimbap", "ingredients": ["rice", "vegetables", "egg", "gochujang"], "category": "main", "cuisine": "korean"},
|
121 |
+
{"name": "Moroccan Tagine", "ingredients": ["chicken", "preserved lemons", "olives", "spices"], "category": "main", "cuisine": "moroccan"}
|
122 |
+
]
|
123 |
+
return self._load_lightweight_dataset() + hf_recipes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
+
def load_custom_csv(self, uploaded_file) -> bool:
|
126 |
+
"""Load user-uploaded CSV dataset"""
|
127 |
+
try:
|
128 |
+
df = pd.read_csv(uploaded_file)
|
129 |
+
|
130 |
+
# Expected columns: name, ingredients, category, cuisine (optional)
|
131 |
+
required_cols = ['name', 'ingredients']
|
132 |
+
if not all(col in df.columns for col in required_cols):
|
133 |
+
st.error("CSV must have 'name' and 'ingredients' columns")
|
134 |
+
return False
|
135 |
+
|
136 |
+
# Convert to our format
|
137 |
+
recipes = []
|
138 |
+
for _, row in df.iterrows():
|
139 |
recipe = {
|
140 |
+
"name": row['name'],
|
141 |
+
"ingredients": row['ingredients'].split(',') if isinstance(row['ingredients'], str) else row['ingredients'],
|
142 |
+
"category": row.get('category', 'unknown'),
|
143 |
+
"cuisine": row.get('cuisine', 'unknown')
|
|
|
144 |
}
|
145 |
+
recipes.append(recipe)
|
146 |
+
|
147 |
+
self.recipe_db = recipes
|
148 |
+
self.dataset_loaded = True
|
149 |
+
return True
|
150 |
+
|
151 |
+
except Exception as e:
|
152 |
+
st.error(f"Error loading CSV: {str(e)}")
|
153 |
+
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
|
155 |
def setup_gemini(self, api_key: str) -> bool:
|
156 |
"""Initialize Gemini API"""
|
|
|
164 |
return False
|
165 |
|
166 |
def retrieve_relevant_recipes(self, user_ingredients: List[str], top_k: int = 5) -> List[Dict]:
|
167 |
+
"""Enhanced RAG retrieval with more sophisticated matching"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
user_ingredients = [ing.lower().strip() for ing in user_ingredients]
|
169 |
relevant_recipes = []
|
170 |
|
171 |
+
for recipe in self.recipe_db:
|
172 |
+
recipe_ingredients = [ing.lower().strip() for ing in recipe["ingredients"]]
|
173 |
+
|
174 |
+
# Calculate multiple similarity metrics
|
175 |
overlap = len(set(user_ingredients) & set(recipe_ingredients))
|
176 |
|
177 |
if overlap > 0:
|
178 |
+
# Jaccard similarity
|
179 |
+
jaccard = overlap / len(set(user_ingredients) | set(recipe_ingredients))
|
180 |
+
|
181 |
+
# Coverage (how much of the recipe ingredients we have)
|
182 |
+
coverage = overlap / len(recipe_ingredients)
|
183 |
+
|
184 |
+
# Combined relevance score
|
185 |
+
relevance_score = (jaccard * 0.5) + (coverage * 0.5)
|
186 |
+
|
187 |
relevant_recipes.append({
|
188 |
**recipe,
|
189 |
+
"relevance_score": relevance_score,
|
190 |
+
"matching_ingredients": overlap,
|
191 |
+
"ingredient_coverage": coverage
|
192 |
})
|
193 |
|
194 |
+
# Sort by relevance and return top matches
|
195 |
relevant_recipes.sort(key=lambda x: x["relevance_score"], reverse=True)
|
196 |
+
return relevant_recipes[:top_k]
|
197 |
|
198 |
def generate_recipes_with_gemini(self, user_ingredients: List[str], relevant_recipes: List[Dict]) -> List[Dict]:
|
199 |
+
"""Enhanced generation with better context"""
|
200 |
+
|
201 |
ingredients_text = ", ".join(user_ingredients)
|
202 |
|
203 |
+
# Create richer context from retrieved recipes
|
204 |
+
context_text = "\n".join([
|
205 |
+
f"- {r['name']} ({r.get('cuisine', 'unknown')} cuisine): {', '.join(r['ingredients'][:5])} - Category: {r.get('category', 'main')}"
|
206 |
+
for r in relevant_recipes
|
207 |
+
])
|
|
|
208 |
|
209 |
prompt = f"""
|
210 |
+
Based on available ingredients: {ingredients_text}
|
211 |
|
212 |
+
Context from similar recipes in database:
|
213 |
{context_text}
|
214 |
|
215 |
+
Generate 4 diverse, practical recipes using primarily the given ingredients. Include recipes from different cuisines and categories when possible.
|
216 |
|
217 |
+
For each recipe provide:
|
218 |
+
1. Recipe Name (creative and appetizing)
|
219 |
+
2. Complete ingredient list with quantities
|
220 |
+
3. Step-by-step instructions (numbered, clear)
|
221 |
+
4. Preparation time (realistic)
|
222 |
+
5. Cooking time (realistic)
|
223 |
+
6. A helpful cooking tip or variation
|
224 |
+
7. Cuisine type
|
225 |
+
8. Difficulty level (Easy/Medium/Hard)
|
226 |
|
227 |
+
Format as JSON:
|
228 |
{{
|
229 |
"recipes": [
|
230 |
{{
|
231 |
"name": "Recipe Name",
|
232 |
"ingredients_with_quantities": ["2 eggs", "1 tbsp butter"],
|
233 |
+
"instructions": ["Step 1: ...", "Step 2: ..."],
|
234 |
"prep_time": 10,
|
235 |
"cook_time": 15,
|
236 |
+
"tip": "Pro tip here",
|
237 |
+
"cuisine": "Italian",
|
238 |
+
"difficulty": "Easy"
|
239 |
}}
|
240 |
]
|
241 |
}}
|
242 |
+
|
243 |
+
Make recipes practical and achievable with the given ingredients.
|
244 |
"""
|
245 |
|
246 |
try:
|
|
|
249 |
|
250 |
json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
|
251 |
if json_match:
|
252 |
+
json_text = json_match.group()
|
253 |
+
recipes_data = json.loads(json_text)
|
254 |
return recipes_data.get("recipes", [])
|
255 |
+
else:
|
256 |
+
return self.parse_text_response(response_text)
|
257 |
+
|
258 |
except Exception as e:
|
259 |
st.error(f"Error generating recipes: {str(e)}")
|
260 |
+
return []
|
261 |
+
|
262 |
+
def parse_text_response(self, text: str) -> List[Dict]:
|
263 |
+
"""Enhanced fallback parser"""
|
264 |
+
# Same as before but with additional fields
|
265 |
return []
|
266 |
|
267 |
def main():
|
268 |
+
st.markdown('<h1 class="main-header">π³ AI Recipe Generator Pro</h1>', unsafe_allow_html=True)
|
|
|
269 |
|
270 |
# Initialize enhanced RAG system
|
271 |
+
if 'rag_system' not in st.session_state:
|
272 |
+
st.session_state.rag_system = EnhancedRecipeRAG()
|
273 |
|
274 |
+
rag_system = st.session_state.rag_system
|
275 |
|
276 |
# Sidebar configuration
|
277 |
with st.sidebar:
|
|
|
279 |
|
280 |
# API Key
|
281 |
api_key = st.text_input("Google Gemini API Key", type="password")
|
282 |
+
if api_key:
|
283 |
if rag_system.setup_gemini(api_key):
|
|
|
284 |
st.success("β
API configured!")
|
285 |
|
286 |
st.markdown("---")
|
287 |
|
288 |
+
# Dataset Selection
|
289 |
+
st.header("π Recipe Database")
|
|
|
290 |
dataset_option = st.selectbox(
|
291 |
+
"Choose dataset size:",
|
292 |
+
["lightweight", "kaggle_ingredients", "huggingface", "custom_csv"],
|
293 |
+
format_func=lambda x: {
|
294 |
+
"lightweight": "π Lightweight (50KB, ~25 recipes)",
|
295 |
+
"kaggle_ingredients": "π Kaggle Dataset (~100 recipes)",
|
296 |
+
"huggingface": "π€ HuggingFace Dataset (~200 recipes)",
|
297 |
+
"custom_csv": "π Upload Custom CSV"
|
298 |
+
}[x]
|
299 |
)
|
300 |
|
301 |
+
# Handle custom CSV upload
|
302 |
+
if dataset_option == "custom_csv":
|
303 |
+
uploaded_file = st.file_uploader(
|
304 |
+
"Upload Recipe CSV",
|
305 |
+
type=['csv'],
|
306 |
+
help="Columns: name, ingredients, category (optional), cuisine (optional)"
|
307 |
+
)
|
308 |
if uploaded_file:
|
309 |
+
if rag_system.load_custom_csv(uploaded_file):
|
310 |
+
st.success(f"β
Loaded {len(rag_system.recipe_db)} recipes!")
|
311 |
+
else:
|
312 |
+
if st.button("Load Dataset"):
|
313 |
+
if rag_system.load_dataset_option(dataset_option):
|
314 |
+
st.success(f"β
Loaded {len(rag_system.recipe_db)} recipes!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
315 |
|
316 |
+
# Dataset info
|
317 |
if rag_system.dataset_loaded:
|
318 |
+
st.info(f"π Database: {len(rag_system.recipe_db)} recipes loaded")
|
319 |
+
|
320 |
+
# Show dataset stats
|
321 |
+
if rag_system.recipe_db:
|
322 |
+
categories = {}
|
323 |
+
cuisines = {}
|
324 |
+
for recipe in rag_system.recipe_db:
|
325 |
+
cat = recipe.get('category', 'unknown')
|
326 |
+
cuisine = recipe.get('cuisine', 'unknown')
|
327 |
+
categories[cat] = categories.get(cat, 0) + 1
|
328 |
+
cuisines[cuisine] = cuisines.get(cuisine, 0) + 1
|
329 |
+
|
330 |
+
with st.expander("π Dataset Statistics"):
|
331 |
+
st.write("**Categories:**")
|
332 |
+
for cat, count in categories.items():
|
333 |
+
st.write(f"β’ {cat}: {count}")
|
334 |
+
|
335 |
+
st.write("**Cuisines:**")
|
336 |
+
for cuisine, count in cuisines.items():
|
337 |
+
st.write(f"β’ {cuisine}: {count}")
|
338 |
+
|
339 |
# Main interface
|
340 |
+
if not rag_system.dataset_loaded:
|
341 |
+
st.warning("β οΈ Please load a recipe dataset from the sidebar first!")
|
342 |
+
return
|
343 |
+
|
344 |
+
if not api_key:
|
345 |
+
st.warning("β οΈ Please enter your Google Gemini API key in the sidebar!")
|
346 |
+
return
|
347 |
+
|
348 |
+
# Recipe generation interface
|
349 |
col1, col2 = st.columns([3, 1])
|
350 |
|
351 |
with col1:
|
352 |
ingredients_input = st.text_input(
|
353 |
+
"π₯ Enter your ingredients:",
|
354 |
+
placeholder="e.g., chicken, rice, onion, garlic, tomato",
|
355 |
help="Separate ingredients with commas"
|
356 |
)
|
357 |
|
358 |
with col2:
|
359 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
360 |
generate_button = st.button("π Generate Recipes", type="primary", use_container_width=True)
|
361 |
|
362 |
+
# Advanced options
|
363 |
+
with st.expander("π§ Advanced Options"):
|
364 |
+
col1, col2 = st.columns(2)
|
365 |
+
with col1:
|
366 |
+
num_recipes = st.slider("Number of recipes to generate:", 2, 6, 4)
|
367 |
+
with col2:
|
368 |
+
retrieval_k = st.slider("Similar recipes to consider:", 3, 10, 5)
|
369 |
+
|
370 |
+
if generate_button and ingredients_input.strip():
|
|
|
371 |
user_ingredients = [ing.strip() for ing in ingredients_input.split(',') if ing.strip()]
|
372 |
|
373 |
+
with st.spinner("π€ AI is crafting personalized recipes..."):
|
374 |
+
# RAG pipeline
|
375 |
+
relevant_recipes = rag_system.retrieve_relevant_recipes(user_ingredients, retrieval_k)
|
376 |
generated_recipes = rag_system.generate_recipes_with_gemini(user_ingredients, relevant_recipes)
|
377 |
|
|
|
378 |
if generated_recipes:
|
379 |
st.markdown("## π½οΈ Your Personalized Recipes")
|
380 |
|
381 |
# Show retrieval context
|
382 |
+
with st.expander("π Similar Recipes Found (RAG Context)"):
|
383 |
+
for i, recipe in enumerate(relevant_recipes[:3], 1):
|
384 |
+
st.write(f"**{i}. {recipe['name']}** ({recipe.get('cuisine', 'unknown')} cuisine)")
|
385 |
+
st.write(f" Relevance: {recipe['relevance_score']:.2f} | Matching ingredients: {recipe['matching_ingredients']}")
|
|
|
|
|
386 |
|
387 |
# Display generated recipes
|
388 |
+
for i, recipe in enumerate(generated_recipes[:num_recipes], 1):
|
389 |
+
with st.expander(f"π³ Recipe {i}: {recipe.get('name', 'Delicious Recipe')}", expanded=i==1):
|
390 |
|
391 |
+
# Enhanced header with more info
|
392 |
+
col1, col2, col3, col4 = st.columns(4)
|
393 |
with col1:
|
394 |
+
st.markdown(f"**β±οΈ Prep:** {recipe.get('prep_time', 10)} mins")
|
395 |
with col2:
|
396 |
+
st.markdown(f"**π₯ Cook:** {recipe.get('cook_time', 15)} mins")
|
397 |
with col3:
|
398 |
+
st.markdown(f"**π Cuisine:** {recipe.get('cuisine', 'International')}")
|
399 |
+
with col4:
|
400 |
+
st.markdown(f"**π Difficulty:** {recipe.get('difficulty', 'Easy')}")
|
401 |
+
|
402 |
+
st.markdown("---")
|
403 |
+
|
404 |
+
# Rest of the recipe display (ingredients, instructions, tips)
|
405 |
+
# Same as before...
|
406 |
|
|
|
407 |
st.markdown("#### π Ingredients:")
|
408 |
+
ingredients = recipe.get('ingredients_with_quantities', [])
|
409 |
+
for ingredient in ingredients:
|
410 |
+
st.markdown(f"β’ {ingredient}")
|
411 |
|
|
|
412 |
st.markdown("#### π¨βπ³ Instructions:")
|
413 |
+
instructions = recipe.get('instructions', [])
|
414 |
+
for j, instruction in enumerate(instructions, 1):
|
415 |
+
st.markdown(f"**{j}.** {instruction}")
|
416 |
|
417 |
+
tip = recipe.get('tip', 'Enjoy your cooking!')
|
418 |
+
if tip:
|
419 |
+
st.markdown(f"""
|
420 |
+
<div class="tip-box" style="background: #fff3cd; padding: 1rem; border-radius: 5px; margin-top: 1rem;">
|
421 |
+
<strong>π‘ Pro Tip:</strong> {tip}
|
422 |
+
</div>
|
423 |
+
""", unsafe_allow_html=True)
|
424 |
|
425 |
if __name__ == "__main__":
|
426 |
main()
|