|
""" |
|
Smart Food Image Generator - Gradio App |
|
Multimodal AI for Food Delivery Business |
|
|
|
Google Colab - https://colab.research.google.com/drive/1GKJUa7zI9Ei0IkgNzRlBEhijdILhdymc?usp=sharing |
|
|
|
This app generates food images and analyzes them for safety |
|
using Stable Diffusion and BLIP models. |
|
|
|
OVERVIEW: |
|
--------- |
|
This application combines text-to-image generation with visual question answering |
|
to create and analyze food images. It uses: |
|
- Stable Diffusion v1.5 for generating high-quality food images from text descriptions |
|
- BLIP VQA (Visual Question Answering) for analyzing food safety, ingredients, and allergens |
|
|
|
FEATURES: |
|
--------- |
|
1. AI-powered food image generation from text descriptions |
|
2. Automatic food analysis including allergen detection |
|
3. Upload and analyze your own food images |
|
4. Professional food photography style generation |
|
|
|
MODELS USED: |
|
------------ |
|
- runwayml/stable-diffusion-v1-5: Text-to-image generation |
|
- Salesforce/blip-vqa-base: Visual question answering for food analysis |
|
|
|
REQUIREMENTS: |
|
------------- |
|
- torch |
|
- gradio |
|
- diffusers |
|
- transformers |
|
- PIL (Pillow) |
|
|
|
HOW TO RUN: |
|
----------- |
|
1. Install dependencies: |
|
pip install torch gradio diffusers transformers pillow |
|
|
|
2. Run the application: |
|
python food_app.py |
|
|
|
3. Open your browser and navigate to: |
|
http://localhost:7860 |
|
|
|
4. Follow the app instructions: |
|
- Click "Load Models" first (required) |
|
- Generate food images with descriptions |
|
- Analyze food safety and allergens |
|
|
|
USAGE EXAMPLES: |
|
--------------- |
|
Food Descriptions: |
|
- "butter chicken with rice" |
|
- "chocolate chip cookies" |
|
- "grilled fish with vegetables" |
|
- "veg margherita pizza" |
|
|
|
The app will generate professional-looking food images and automatically |
|
analyze them for allergens, dietary restrictions, and safety information. |
|
""" |
|
|
|
import warnings |
|
|
|
import gradio as gr |
|
import torch |
|
from diffusers import StableDiffusionPipeline |
|
from PIL import Image |
|
from transformers import BlipForQuestionAnswering, BlipProcessor |
|
|
|
|
|
warnings.filterwarnings("ignore") |
|
|
|
|
|
class SmartFoodGenerator: |
|
""" |
|
Main class for generating and analyzing food images using AI models. |
|
|
|
This class integrates Stable Diffusion for image generation and BLIP for |
|
visual question answering to create a complete food analysis pipeline. |
|
|
|
Attributes: |
|
device (str): Computing device ('cuda', 'mps', or 'cpu') |
|
dtype (torch.dtype): Data type for model optimization |
|
text2img_pipe: Stable Diffusion pipeline for image generation |
|
blip_model: BLIP model for visual question answering |
|
blip_processor: BLIP processor for input preprocessing |
|
models_loaded (bool): Flag to track if models are loaded |
|
""" |
|
|
|
def __init__(self): |
|
"""Initialize the SmartFoodGenerator with device setup and model placeholders.""" |
|
|
|
self.device, self.dtype = self.setup_device() |
|
|
|
|
|
self.text2img_pipe = None |
|
self.blip_model = None |
|
self.blip_processor = None |
|
self.models_loaded = False |
|
|
|
def setup_device(self): |
|
""" |
|
Setup the optimal computing device and data type for AI models. |
|
|
|
Priority order: CUDA GPU > Apple Silicon MPS > CPU |
|
Uses float16 for CUDA (memory efficiency) and float32 for others (stability). |
|
|
|
Returns: |
|
tuple: (device_name, torch_dtype) for model optimization |
|
""" |
|
if torch.cuda.is_available(): |
|
|
|
return "cuda", torch.float16 |
|
elif torch.backends.mps.is_available(): |
|
|
|
return "mps", torch.float32 |
|
else: |
|
|
|
return "cpu", torch.float32 |
|
|
|
def load_models(self): |
|
""" |
|
Load all required AI models for food generation and analysis. |
|
|
|
Downloads and initializes: |
|
1. Stable Diffusion v1.5 for text-to-image generation |
|
2. BLIP VQA for visual question answering and food analysis |
|
|
|
Returns: |
|
str: Status message indicating success or failure |
|
""" |
|
|
|
if self.models_loaded: |
|
return "β
Models already loaded!" |
|
|
|
try: |
|
print("π¦ Loading models...") |
|
|
|
|
|
|
|
self.text2img_pipe = StableDiffusionPipeline.from_pretrained( |
|
"runwayml/stable-diffusion-v1-5", |
|
torch_dtype=self.dtype, |
|
safety_checker=None, |
|
requires_safety_checker=False, |
|
) |
|
|
|
self.text2img_pipe = self.text2img_pipe.to(self.device) |
|
|
|
|
|
|
|
self.blip_model = BlipForQuestionAnswering.from_pretrained( |
|
"Salesforce/blip-vqa-base" |
|
) |
|
self.blip_processor = BlipProcessor.from_pretrained( |
|
"Salesforce/blip-vqa-base" |
|
) |
|
|
|
|
|
self.blip_model.eval() |
|
|
|
|
|
self.models_loaded = True |
|
return "β
All models loaded successfully!" |
|
|
|
except Exception as e: |
|
return f"β Error loading models: {str(e)}" |
|
|
|
def generate_food_image(self, food_description, seed=42): |
|
""" |
|
Generate professional food image from text description using Stable Diffusion. |
|
|
|
Args: |
|
food_description (str): Text description of the food to generate |
|
seed (int): Random seed for reproducible results (default: 42) |
|
|
|
Returns: |
|
tuple: (PIL.Image or None, status_message) |
|
""" |
|
|
|
if not self.models_loaded: |
|
return None, "β Models not loaded. Please load models first." |
|
|
|
|
|
if not food_description: |
|
return None, "β Please provide a food description." |
|
|
|
try: |
|
print(f"π½οΈ Generating: {food_description}") |
|
|
|
|
|
|
|
prompt = f"{food_description}, professional food photography, appetizing, restaurant style" |
|
|
|
|
|
torch.manual_seed(seed) |
|
if torch.cuda.is_available(): |
|
torch.cuda.manual_seed(seed) |
|
|
|
|
|
with torch.no_grad(): |
|
result = self.text2img_pipe( |
|
prompt=prompt, |
|
negative_prompt="blurry, low quality, unappetizing", |
|
num_inference_steps=20, |
|
guidance_scale=7.5, |
|
height=512, |
|
width=512, |
|
) |
|
|
|
return result.images[0], "β
Food image generated successfully!" |
|
|
|
except Exception as e: |
|
return None, f"β Error generating image: {str(e)}" |
|
|
|
def ask_about_food(self, image, question): |
|
""" |
|
Ask questions about food using BLIP Visual Question Answering. |
|
|
|
Args: |
|
image (PIL.Image): Food image to analyze |
|
question (str): Question to ask about the image |
|
|
|
Returns: |
|
str: Answer to the question or error message |
|
""" |
|
|
|
if not self.models_loaded: |
|
return "β Models not loaded." |
|
|
|
try: |
|
|
|
|
|
inputs = self.blip_processor(image, question, return_tensors="pt") |
|
|
|
|
|
with torch.no_grad(): |
|
out = self.blip_model.generate( |
|
**inputs, |
|
max_length=200, |
|
num_beams=5, |
|
) |
|
|
|
|
|
answer = self.blip_processor.decode(out[0], skip_special_tokens=True) |
|
return answer.strip() |
|
|
|
except Exception as e: |
|
return f"β Error: {str(e)}" |
|
|
|
def analyze_food_safety(self, food_image): |
|
""" |
|
Comprehensive food analysis including allergens and dietary information. |
|
|
|
Uses BLIP VQA to analyze the food image for: |
|
- General description |
|
- Common allergens (dairy, nuts, eggs, gluten) |
|
- Dietary restrictions (vegetarian, spicy) |
|
|
|
Args: |
|
food_image (PIL.Image): Food image to analyze |
|
|
|
Returns: |
|
str: Formatted analysis results or error message |
|
""" |
|
|
|
if not self.models_loaded: |
|
return "β Models not loaded." |
|
|
|
|
|
if food_image is None: |
|
return "β No image provided." |
|
|
|
try: |
|
print("π¬ Analyzing food ...") |
|
|
|
|
|
description = self.ask_about_food(food_image, "Describe the food") |
|
|
|
|
|
|
|
allergen_questions = [ |
|
"Does this contain dairy or milk?", |
|
"Does this contain nuts?", |
|
"Does this contain eggs?", |
|
"Does this contain wheat or gluten?", |
|
] |
|
|
|
|
|
allergens = [] |
|
for question in allergen_questions: |
|
answer = self.ask_about_food(food_image, question) |
|
|
|
if "yes" in answer.lower(): |
|
|
|
allergen = question.split("contain ")[-1].split("?")[0] |
|
allergens.append(allergen) |
|
|
|
|
|
vegetarian = self.ask_about_food(food_image, "Is this vegetarian?") |
|
spicy = self.ask_about_food(food_image, "Is this spicy?") |
|
|
|
|
|
analysis_text = f"π¬ FOOD SAFETY ANALYSIS\n\n" |
|
analysis_text += f"π Description: {description}\n\n" |
|
analysis_text += f"β οΈ Allergens: {', '.join(allergens) if allergens else 'None detected'}\n\n" |
|
analysis_text += f"π₯¬ Vegetarian: {vegetarian}\n\n" |
|
analysis_text += f"πΆοΈ Spicy: {spicy}" |
|
|
|
return analysis_text |
|
|
|
except Exception as e: |
|
return f"β Error analyzing food: {str(e)}" |
|
|
|
def generate_and_analyze_food(self, food_description, seed=42): |
|
""" |
|
Complete end-to-end pipeline: generate food image and analyze it. |
|
|
|
This method combines image generation and analysis into a single workflow: |
|
1. Generate professional food image from text description |
|
2. Automatically analyze the generated image for allergens |
|
|
|
Args: |
|
food_description (str): Text description of food to generate |
|
seed (int): Random seed for reproducible image generation |
|
|
|
Returns: |
|
tuple: (PIL.Image or None, analysis_text or error_message) |
|
""" |
|
|
|
if not self.models_loaded: |
|
return None, "β Models not loaded. Please load models first." |
|
|
|
|
|
if not food_description: |
|
return None, "β Please provide a food description." |
|
|
|
try: |
|
print(f"π Complete pipeline for: {food_description}") |
|
|
|
|
|
food_image, gen_status = self.generate_food_image(food_description, seed) |
|
|
|
|
|
if food_image is None: |
|
return None, gen_status |
|
|
|
|
|
analysis = self.analyze_food_safety(food_image) |
|
|
|
return food_image, analysis |
|
|
|
except Exception as e: |
|
return None, f"β Error in pipeline: {str(e)}" |
|
|
|
|
|
|
|
|
|
food_generator = SmartFoodGenerator() |
|
|
|
|
|
|
|
|
|
|
|
def load_models_interface(): |
|
""" |
|
Gradio interface wrapper for loading AI models. |
|
|
|
Returns: |
|
str: Status message from model loading process |
|
""" |
|
return food_generator.load_models() |
|
|
|
|
|
def generate_food_interface(food_description, seed): |
|
""" |
|
Gradio interface wrapper for generating and analyzing food images. |
|
|
|
Args: |
|
food_description (str): User input food description from Gradio textbox |
|
seed (int): Random seed value from Gradio slider |
|
|
|
Returns: |
|
tuple: (generated_image, analysis_text) for Gradio outputs |
|
""" |
|
image, status = food_generator.generate_and_analyze_food(food_description, seed) |
|
return image, status |
|
|
|
|
|
def analyze_uploaded_food(image): |
|
""" |
|
Gradio interface wrapper for analyzing uploaded food images. |
|
|
|
Args: |
|
image (PIL.Image or None): Image uploaded through Gradio interface |
|
|
|
Returns: |
|
str: Food safety analysis results or error message |
|
""" |
|
|
|
if image is None: |
|
return "β Please upload an image." |
|
return food_generator.analyze_food_safety(image) |
|
|
|
|
|
def ask_question_interface(image, question): |
|
""" |
|
Gradio interface wrapper for asking questions about food images. |
|
|
|
Args: |
|
image (PIL.Image or None): Image from Gradio component |
|
question (str): Question text from Gradio textbox |
|
|
|
Returns: |
|
str: Answer to the question or error message |
|
""" |
|
|
|
if image is None: |
|
return "β Please upload an image." |
|
if not question: |
|
return "β Please enter a question." |
|
return food_generator.ask_about_food(image, question) |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="Smart Food Image Generator") as app: |
|
|
|
|
|
gr.Markdown( |
|
""" |
|
# π½οΈ Smart Food Image Generator |
|
|
|
**Multimodal AI for Food Delivery Business** |
|
|
|
This app generates professional food images from text descriptions and analyzes them for safety, |
|
ingredients, and allergens using Stable Diffusion and BLIP models. |
|
|
|
## π How to use: |
|
1. **Load Models** - Click to load the AI models (required first step) |
|
2. **Generate Food** - Enter a food description to generate and analyze images |
|
3. **Analyze Food** - Upload your own food images for safety analysis |
|
""" |
|
) |
|
|
|
|
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(): |
|
|
|
gr.Markdown("### π¦ Step 1: Load Models") |
|
|
|
|
|
|
|
|
|
load_btn = gr.Button("π Load Models", variant="primary", size="lg") |
|
|
|
|
|
|
|
|
|
load_status = gr.Textbox(label="Status", interactive=False) |
|
|
|
|
|
|
|
|
|
load_btn.click( |
|
fn=load_models_interface, |
|
outputs=load_status, |
|
) |
|
|
|
|
|
gr.Markdown("---") |
|
|
|
|
|
with gr.Row(): |
|
|
|
|
|
with gr.Column(scale=1): |
|
gr.Markdown("### π½οΈ Step 2: Generate Food Images") |
|
|
|
|
|
|
|
|
|
|
|
food_input = gr.Textbox( |
|
label="Food Description", |
|
placeholder="e.g., butter chicken with rice, chocolate chip cookies, grilled salmon with vegetables", |
|
lines=2, |
|
) |
|
|
|
|
|
|
|
|
|
|
|
seed_input = gr.Slider( |
|
label="Seed (for reproducible results)", |
|
minimum=1, |
|
maximum=1000, |
|
value=42, |
|
step=1, |
|
) |
|
|
|
|
|
generate_btn = gr.Button("π¨ Generate & Analyze Food", variant="primary") |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
|
|
|
|
|
generated_image = gr.Image(label="Generated Food Image", height=400) |
|
|
|
|
|
|
|
|
|
analysis_output = gr.Textbox( |
|
label="Food Analysis", lines=10, interactive=False |
|
) |
|
|
|
|
|
|
|
|
|
generate_btn.click( |
|
fn=generate_food_interface, |
|
inputs=[food_input, seed_input], |
|
outputs=[generated_image, analysis_output], |
|
) |
|
|
|
|
|
gr.Markdown("---") |
|
|
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(scale=1): |
|
gr.Markdown("### π¬ Step 3: Analyze Your Food Images") |
|
|
|
|
|
|
|
|
|
|
|
uploaded_image = gr.Image(label="Upload Food Image", height=400) |
|
|
|
|
|
|
|
analyze_btn = gr.Button("π Analyze Food", variant="secondary") |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
|
|
|
uploaded_analysis = gr.Textbox( |
|
label="Analysis Results", |
|
lines=10, |
|
interactive=False, |
|
) |
|
|
|
|
|
|
|
|
|
analyze_btn.click( |
|
fn=analyze_uploaded_food, |
|
inputs=uploaded_image, |
|
outputs=uploaded_analysis, |
|
) |
|
|
|
|
|
|
|
gr.Markdown( |
|
""" |
|
--- |
|
### π Example Food Descriptions: |
|
- veg noodles |
|
- chilli garlic veg noodles |
|
- chicken noodles |
|
- schezwan chicken noodles |
|
- grilled fish with vegetables |
|
- veg margherita pizza |
|
- chicken caesar salad |
|
- veg stir fry |
|
- chocolate cake with strawberries |
|
|
|
|
|
""" |
|
) |
|
|
|
if __name__ == "__main__": |
|
""" |
|
Launch the Gradio app when script is run directly. |
|
|
|
Configuration: |
|
server_name="0.0.0.0": Allow access from any IP address |
|
server_port=7860: Use port 7860 (Gradio default) |
|
share=True: Create public Gradio link for sharing |
|
debug=True: Enable debug mode for development |
|
""" |
|
app.launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
share=True, |
|
debug=True, |
|
) |
|
|