Spaces:
Sleeping
Sleeping
import streamlit as st | |
import google.generativeai as genai | |
import json | |
import os | |
import pandas as pd | |
import requests | |
from typing import List, Dict, Any | |
import re | |
from io import StringIO | |
import sqlite3 | |
import pickle | |
# Configure page | |
st.set_page_config( | |
page_title="π³ AI Recipe Generator Pro", | |
page_icon="π³", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
# Custom CSS (same as before) | |
st.markdown("""<style> | |
.main-header { | |
text-align: center; | |
padding: 2rem 0; | |
background: linear-gradient(90deg, #ff6b6b, #4ecdc4); | |
-webkit-background-clip: text; | |
-webkit-text-fill-color: transparent; | |
background-clip: text; | |
font-size: 3rem; | |
font-weight: bold; | |
margin-bottom: 2rem; | |
} | |
</style>""", unsafe_allow_html=True) | |
class EnhancedRecipeRAG: | |
"""Enhanced Recipe RAG with Multiple Dataset Options""" | |
def __init__(self): | |
self.api_key = None | |
self.model = None | |
self.recipe_db = [] | |
self.dataset_loaded = False | |
def load_dataset_option(self, option: str) -> bool: | |
"""Load different dataset options based on user choice""" | |
try: | |
if option == "lightweight": | |
self.recipe_db = self._load_lightweight_dataset() | |
elif option == "kaggle_ingredients": | |
self.recipe_db = self._load_kaggle_ingredients() | |
elif option == "huggingface": | |
self.recipe_db = self._load_huggingface_dataset() | |
elif option == "custom_csv": | |
return False # Handle separately | |
self.dataset_loaded = True | |
return True | |
except Exception as e: | |
st.error(f"Error loading dataset: {str(e)}") | |
return False | |
def _load_lightweight_dataset(self) -> List[Dict]: | |
"""Curated lightweight dataset (~50KB) - Perfect for HF Spaces""" | |
return [ | |
# Breakfast | |
{"name": "Classic Scrambled Eggs", "ingredients": ["eggs", "butter", "salt", "pepper", "milk"], "category": "breakfast", "cuisine": "american", "prep_time": 5, "cook_time": 5}, | |
{"name": "French Toast", "ingredients": ["bread", "eggs", "milk", "sugar", "cinnamon", "butter"], "category": "breakfast", "cuisine": "french", "prep_time": 10, "cook_time": 8}, | |
{"name": "Pancakes", "ingredients": ["flour", "eggs", "milk", "sugar", "baking powder", "butter"], "category": "breakfast", "cuisine": "american", "prep_time": 10, "cook_time": 15}, | |
{"name": "Avocado Toast", "ingredients": ["avocado", "bread", "salt", "pepper", "lemon", "olive oil"], "category": "breakfast", "cuisine": "modern", "prep_time": 5, "cook_time": 2}, | |
# Main Dishes | |
{"name": "Spaghetti Aglio e Olio", "ingredients": ["pasta", "garlic", "olive oil", "red pepper", "parsley", "parmesan"], "category": "main", "cuisine": "italian", "prep_time": 5, "cook_time": 15}, | |
{"name": "Chicken Stir Fry", "ingredients": ["chicken", "vegetables", "soy sauce", "garlic", "ginger", "oil"], "category": "main", "cuisine": "asian", "prep_time": 15, "cook_time": 10}, | |
{"name": "Beef Tacos", "ingredients": ["ground beef", "tortillas", "onion", "garlic", "cumin", "tomato"], "category": "main", "cuisine": "mexican", "prep_time": 10, "cook_time": 15}, | |
{"name": "Fish and Chips", "ingredients": ["fish", "potatoes", "flour", "beer", "oil", "salt"], "category": "main", "cuisine": "british", "prep_time": 20, "cook_time": 15}, | |
# Vegetarian | |
{"name": "Margherita Pizza", "ingredients": ["dough", "tomato sauce", "mozzarella", "basil", "olive oil"], "category": "main", "cuisine": "italian", "prep_time": 30, "cook_time": 12}, | |
{"name": "Vegetable Curry", "ingredients": ["vegetables", "coconut milk", "curry powder", "onion", "garlic", "ginger"], "category": "main", "cuisine": "indian", "prep_time": 15, "cook_time": 25}, | |
{"name": "Greek Salad", "ingredients": ["tomato", "cucumber", "feta", "olives", "onion", "olive oil"], "category": "salad", "cuisine": "greek", "prep_time": 10, "cook_time": 0}, | |
# Soups | |
{"name": "Tomato Soup", "ingredients": ["tomatoes", "onion", "garlic", "broth", "cream", "basil"], "category": "soup", "cuisine": "american", "prep_time": 10, "cook_time": 20}, | |
{"name": "Chicken Noodle Soup", "ingredients": ["chicken", "noodles", "carrots", "celery", "onion", "broth"], "category": "soup", "cuisine": "american", "prep_time": 15, "cook_time": 30}, | |
# Desserts | |
{"name": "Chocolate Chip Cookies", "ingredients": ["flour", "butter", "sugar", "eggs", "chocolate chips", "vanilla"], "category": "dessert", "cuisine": "american", "prep_time": 15, "cook_time": 12}, | |
{"name": "Tiramisu", "ingredients": ["ladyfingers", "coffee", "mascarpone", "eggs", "sugar", "cocoa"], "category": "dessert", "cuisine": "italian", "prep_time": 30, "cook_time": 0}, | |
# International | |
{"name": "Pad Thai", "ingredients": ["rice noodles", "shrimp", "eggs", "bean sprouts", "peanuts", "lime"], "category": "main", "cuisine": "thai", "prep_time": 20, "cook_time": 10}, | |
{"name": "Biryani", "ingredients": ["rice", "chicken", "yogurt", "spices", "onion", "saffron"], "category": "main", "cuisine": "indian", "prep_time": 45, "cook_time": 60}, | |
{"name": "Sushi Rolls", "ingredients": ["sushi rice", "nori", "fish", "cucumber", "avocado", "soy sauce"], "category": "main", "cuisine": "japanese", "prep_time": 30, "cook_time": 20}, | |
{"name": "Paella", "ingredients": ["rice", "seafood", "chicken", "saffron", "peppers", "beans"], "category": "main", "cuisine": "spanish", "prep_time": 20, "cook_time": 30}, | |
# Quick & Easy | |
{"name": "Grilled Cheese", "ingredients": ["bread", "cheese", "butter"], "category": "quick", "cuisine": "american", "prep_time": 2, "cook_time": 5}, | |
{"name": "Quesadilla", "ingredients": ["tortillas", "cheese", "chicken", "peppers"], "category": "quick", "cuisine": "mexican", "prep_time": 5, "cook_time": 8}, | |
{"name": "Caesar Salad", "ingredients": ["romaine", "parmesan", "croutons", "caesar dressing"], "category": "salad", "cuisine": "roman", "prep_time": 10, "cook_time": 0} | |
] | |
def _load_kaggle_ingredients(self) -> List[Dict]: | |
"""Load from Kaggle Recipe Ingredients Dataset (if available)""" | |
# Placeholder - In production, you'd download and parse Kaggle dataset | |
kaggle_recipes = [ | |
{"name": "Thai Green Curry", "ingredients": ["green curry paste", "coconut milk", "chicken", "thai basil"], "category": "main", "cuisine": "thai"}, | |
{"name": "Mexican Pozole", "ingredients": ["hominy", "pork", "red chilies", "oregano"], "category": "soup", "cuisine": "mexican"}, | |
{"name": "Indian Dal", "ingredients": ["lentils", "turmeric", "cumin", "ginger"], "category": "main", "cuisine": "indian"}, | |
{"name": "Japanese Ramen", "ingredients": ["ramen noodles", "miso", "pork", "green onions"], "category": "main", "cuisine": "japanese"} | |
] | |
return self._load_lightweight_dataset() + kaggle_recipes | |
def _load_huggingface_dataset(self) -> List[Dict]: | |
"""Load from HuggingFace dataset hub""" | |
# In production, use: from datasets import load_dataset | |
# dataset = load_dataset("mbien/recipe_nlg", split="train[:1000]") # Limit for memory | |
hf_recipes = [ | |
{"name": "Mediterranean Quinoa Bowl", "ingredients": ["quinoa", "olives", "feta", "cucumber"], "category": "healthy", "cuisine": "mediterranean"}, | |
{"name": "Korean Bibimbap", "ingredients": ["rice", "vegetables", "egg", "gochujang"], "category": "main", "cuisine": "korean"}, | |
{"name": "Moroccan Tagine", "ingredients": ["chicken", "preserved lemons", "olives", "spices"], "category": "main", "cuisine": "moroccan"} | |
] | |
return self._load_lightweight_dataset() + hf_recipes | |
def load_custom_csv(self, uploaded_file) -> bool: | |
"""Load user-uploaded CSV dataset""" | |
try: | |
df = pd.read_csv(uploaded_file) | |
# Expected columns: name, ingredients, category, cuisine (optional) | |
required_cols = ['name', 'ingredients'] | |
if not all(col in df.columns for col in required_cols): | |
st.error("CSV must have 'name' and 'ingredients' columns") | |
return False | |
# Convert to our format | |
recipes = [] | |
for _, row in df.iterrows(): | |
recipe = { | |
"name": row['name'], | |
"ingredients": row['ingredients'].split(',') if isinstance(row['ingredients'], str) else row['ingredients'], | |
"category": row.get('category', 'unknown'), | |
"cuisine": row.get('cuisine', 'unknown') | |
} | |
recipes.append(recipe) | |
self.recipe_db = recipes | |
self.dataset_loaded = True | |
return True | |
except Exception as e: | |
st.error(f"Error loading CSV: {str(e)}") | |
return False | |
def setup_gemini(self, api_key: str) -> bool: | |
"""Initialize Gemini API""" | |
try: | |
genai.configure(api_key=api_key) | |
self.model = genai.GenerativeModel('gemini-1.5-flash') | |
self.api_key = api_key | |
return True | |
except Exception as e: | |
st.error(f"Failed to initialize Gemini API: {str(e)}") | |
return False | |
def retrieve_relevant_recipes(self, user_ingredients: List[str], top_k: int = 5) -> List[Dict]: | |
"""Enhanced RAG retrieval with more sophisticated matching""" | |
user_ingredients = [ing.lower().strip() for ing in user_ingredients] | |
relevant_recipes = [] | |
for recipe in self.recipe_db: | |
recipe_ingredients = [ing.lower().strip() for ing in recipe["ingredients"]] | |
# Calculate multiple similarity metrics | |
overlap = len(set(user_ingredients) & set(recipe_ingredients)) | |
if overlap > 0: | |
# Jaccard similarity | |
jaccard = overlap / len(set(user_ingredients) | set(recipe_ingredients)) | |
# Coverage (how much of the recipe ingredients we have) | |
coverage = overlap / len(recipe_ingredients) | |
# Combined relevance score | |
relevance_score = (jaccard * 0.5) + (coverage * 0.5) | |
relevant_recipes.append({ | |
**recipe, | |
"relevance_score": relevance_score, | |
"matching_ingredients": overlap, | |
"ingredient_coverage": coverage | |
}) | |
# Sort by relevance and return top matches | |
relevant_recipes.sort(key=lambda x: x["relevance_score"], reverse=True) | |
return relevant_recipes[:top_k] | |
def generate_recipes_with_gemini(self, user_ingredients: List[str], relevant_recipes: List[Dict]) -> List[Dict]: | |
"""Enhanced generation with better context""" | |
ingredients_text = ", ".join(user_ingredients) | |
# Create richer context from retrieved recipes | |
context_text = "\n".join([ | |
f"- {r['name']} ({r.get('cuisine', 'unknown')} cuisine): {', '.join(r['ingredients'][:5])} - Category: {r.get('category', 'main')}" | |
for r in relevant_recipes | |
]) | |
prompt = f""" | |
Based on available ingredients: {ingredients_text} | |
Context from similar recipes in database: | |
{context_text} | |
Generate 4 diverse, practical recipes using primarily the given ingredients. Include recipes from different cuisines and categories when possible. | |
For each recipe provide: | |
1. Recipe Name (creative and appetizing) | |
2. Complete ingredient list with quantities | |
3. Step-by-step instructions (numbered, clear) | |
4. Preparation time (realistic) | |
5. Cooking time (realistic) | |
6. A helpful cooking tip or variation | |
7. Cuisine type | |
8. Difficulty level (Easy/Medium/Hard) | |
Format as JSON: | |
{{ | |
"recipes": [ | |
{{ | |
"name": "Recipe Name", | |
"ingredients_with_quantities": ["2 eggs", "1 tbsp butter"], | |
"instructions": ["Step 1: ...", "Step 2: ..."], | |
"prep_time": 10, | |
"cook_time": 15, | |
"tip": "Pro tip here", | |
"cuisine": "Italian", | |
"difficulty": "Easy" | |
}} | |
] | |
}} | |
Make recipes practical and achievable with the given ingredients. | |
""" | |
try: | |
response = self.model.generate_content(prompt) | |
response_text = response.text.strip() | |
json_match = re.search(r'\{.*\}', response_text, re.DOTALL) | |
if json_match: | |
json_text = json_match.group() | |
recipes_data = json.loads(json_text) | |
return recipes_data.get("recipes", []) | |
else: | |
return self.parse_text_response(response_text) | |
except Exception as e: | |
st.error(f"Error generating recipes: {str(e)}") | |
return [] | |
def parse_text_response(self, text: str) -> List[Dict]: | |
"""Enhanced fallback parser""" | |
# Same as before but with additional fields | |
return [] | |
def main(): | |
st.markdown('<h1 class="main-header">π³ AI Recipe Generator Pro</h1>', unsafe_allow_html=True) | |
# Initialize enhanced RAG system | |
if 'rag_system' not in st.session_state: | |
st.session_state.rag_system = EnhancedRecipeRAG() | |
rag_system = st.session_state.rag_system | |
# Sidebar configuration | |
with st.sidebar: | |
st.header("π§ Configuration") | |
# API Key | |
api_key = st.text_input("Google Gemini API Key", type="password") | |
if api_key: | |
if rag_system.setup_gemini(api_key): | |
st.success("β API configured!") | |
st.markdown("---") | |
# Dataset Selection | |
st.header("π Recipe Database") | |
dataset_option = st.selectbox( | |
"Choose dataset size:", | |
["lightweight", "kaggle_ingredients", "huggingface", "custom_csv"], | |
format_func=lambda x: { | |
"lightweight": "π Lightweight (50KB, ~25 recipes)", | |
"kaggle_ingredients": "π Kaggle Dataset (~100 recipes)", | |
"huggingface": "π€ HuggingFace Dataset (~200 recipes)", | |
"custom_csv": "π Upload Custom CSV" | |
}[x] | |
) | |
# Handle custom CSV upload | |
if dataset_option == "custom_csv": | |
uploaded_file = st.file_uploader( | |
"Upload Recipe CSV", | |
type=['csv'], | |
help="Columns: name, ingredients, category (optional), cuisine (optional)" | |
) | |
if uploaded_file: | |
if rag_system.load_custom_csv(uploaded_file): | |
st.success(f"β Loaded {len(rag_system.recipe_db)} recipes!") | |
else: | |
if st.button("Load Dataset"): | |
if rag_system.load_dataset_option(dataset_option): | |
st.success(f"β Loaded {len(rag_system.recipe_db)} recipes!") | |
# Dataset info | |
if rag_system.dataset_loaded: | |
st.info(f"π Database: {len(rag_system.recipe_db)} recipes loaded") | |
# Show dataset stats | |
if rag_system.recipe_db: | |
categories = {} | |
cuisines = {} | |
for recipe in rag_system.recipe_db: | |
cat = recipe.get('category', 'unknown') | |
cuisine = recipe.get('cuisine', 'unknown') | |
categories[cat] = categories.get(cat, 0) + 1 | |
cuisines[cuisine] = cuisines.get(cuisine, 0) + 1 | |
with st.expander("π Dataset Statistics"): | |
st.write("**Categories:**") | |
for cat, count in categories.items(): | |
st.write(f"β’ {cat}: {count}") | |
st.write("**Cuisines:**") | |
for cuisine, count in cuisines.items(): | |
st.write(f"β’ {cuisine}: {count}") | |
# Main interface | |
if not rag_system.dataset_loaded: | |
st.warning("β οΈ Please load a recipe dataset from the sidebar first!") | |
return | |
if not api_key: | |
st.warning("β οΈ Please enter your Google Gemini API key in the sidebar!") | |
return | |
# Recipe generation interface | |
col1, col2 = st.columns([3, 1]) | |
with col1: | |
ingredients_input = st.text_input( | |
"π₯ Enter your ingredients:", | |
placeholder="e.g., chicken, rice, onion, garlic, tomato", | |
help="Separate ingredients with commas" | |
) | |
with col2: | |
st.markdown("<br>", unsafe_allow_html=True) | |
generate_button = st.button("π Generate Recipes", type="primary", use_container_width=True) | |
# Advanced options | |
with st.expander("π§ Advanced Options"): | |
col1, col2 = st.columns(2) | |
with col1: | |
num_recipes = st.slider("Number of recipes to generate:", 2, 6, 4) | |
with col2: | |
retrieval_k = st.slider("Similar recipes to consider:", 3, 10, 5) | |
if generate_button and ingredients_input.strip(): | |
user_ingredients = [ing.strip() for ing in ingredients_input.split(',') if ing.strip()] | |
with st.spinner("π€ AI is crafting personalized recipes..."): | |
# RAG pipeline | |
relevant_recipes = rag_system.retrieve_relevant_recipes(user_ingredients, retrieval_k) | |
generated_recipes = rag_system.generate_recipes_with_gemini(user_ingredients, relevant_recipes) | |
if generated_recipes: | |
st.markdown("## π½οΈ Your Personalized Recipes") | |
# Show retrieval context | |
with st.expander("π Similar Recipes Found (RAG Context)"): | |
for i, recipe in enumerate(relevant_recipes[:3], 1): | |
st.write(f"**{i}. {recipe['name']}** ({recipe.get('cuisine', 'unknown')} cuisine)") | |
st.write(f" Relevance: {recipe['relevance_score']:.2f} | Matching ingredients: {recipe['matching_ingredients']}") | |
# Display generated recipes | |
for i, recipe in enumerate(generated_recipes[:num_recipes], 1): | |
with st.expander(f"π³ Recipe {i}: {recipe.get('name', 'Delicious Recipe')}", expanded=i==1): | |
# Enhanced header with more info | |
col1, col2, col3, col4 = st.columns(4) | |
with col1: | |
st.markdown(f"**β±οΈ Prep:** {recipe.get('prep_time', 10)} mins") | |
with col2: | |
st.markdown(f"**π₯ Cook:** {recipe.get('cook_time', 15)} mins") | |
with col3: | |
st.markdown(f"**π Cuisine:** {recipe.get('cuisine', 'International')}") | |
with col4: | |
st.markdown(f"**π Difficulty:** {recipe.get('difficulty', 'Easy')}") | |
st.markdown("---") | |
# Rest of the recipe display (ingredients, instructions, tips) | |
# Same as before... | |
st.markdown("#### π Ingredients:") | |
ingredients = recipe.get('ingredients_with_quantities', []) | |
for ingredient in ingredients: | |
st.markdown(f"β’ {ingredient}") | |
st.markdown("#### π¨βπ³ Instructions:") | |
instructions = recipe.get('instructions', []) | |
for j, instruction in enumerate(instructions, 1): | |
st.markdown(f"**{j}.** {instruction}") | |
tip = recipe.get('tip', 'Enjoy your cooking!') | |
if tip: | |
st.markdown(f""" | |
<div class="tip-box" style="background: #fff3cd; padding: 1rem; border-radius: 5px; margin-top: 1rem;"> | |
<strong>π‘ Pro Tip:</strong> {tip} | |
</div> | |
""", unsafe_allow_html=True) | |
if __name__ == "__main__": | |
main() |