Scratch_Vision_Game_dup / app2_BLIP.py
prthm11's picture
Upload 11 files
1c58aa5 verified
raw
history blame
4.84 kB
import json
import io
import base64
import os, re
from langchain_google_vertexai.vision_models import VertexAIVisualQnAChat
from PIL import Image
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_groq import ChatGroq
from dotenv import load_dotenv
from groq import Groq
from flask import Flask, jsonify
from langgraph.prebuilt import create_react_agent
from langchain_community.llms import huggingface_pipeline
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch
from langchain_core.prompts import PromptTemplate
load_dotenv()
# os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
# groq_api_key = os.getenv("GROQ_API_KEY")
app = Flask(__name__)
static_image_path = os.path.join("images", "page2_print.jfif")
# llm = ChatGroq(
# model="meta-llama/llama-4-maverick-17b-128e-instruct",
# temperature=0,
# max_tokens=None,
# )
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cpu")
def analyze_with_blip(image_pil):
inputs = processor(image_pil, return_tensors="pt").to("cpu")
out = model.generate(**inputs, max_new_tokens=100)
caption = processor.decode(out[0], skip_special_tokens=True)
return caption
@app.route("/", methods=["GET"])
def analyze_static_image():
if not os.path.exists(static_image_path):
return jsonify({"error": f"Image not found"})
# Load image and convert to base64 string
image_path = r"images\page2_print.jfif"
with open(image_path, "rb") as image_file:
image_bytes = image_file.read()
img_base64 = base64.b64encode(image_bytes).decode("utf-8")
# SET A SYSTEM PROMPT
system_prompt = """
You are an expert in visual scene understanding.
Your Job is to analyze an image and respond with structured json like This :
- Any number of "Sprites": These refer to distinct characters, animals, or objects in the image that are **in front of the background** (e.g., cat, ball, crab, person, etc.).
{
"Sprite 1": {
"name": "Cat",
"description":"An orange cartoon cat with a cheerful expression, shown jumping playfully."
},
"Backdrop":{
"name":"Beach Scene",
"description":"A serene beach with sand, blue water, and a clear sky."
}
}
Guidelines:
- Focus only the images given in Square Shape.
- Don't Consider Blank areas in Image as "Backdrop".
- Do NOT classify the background scene as a sprite.
- All characters or objects placed in the foreground should be "Sprites".
- Use 'Sprite 1', 'Sprite 2', etc. for character or figures.
- Use 'Backdrop' for environmental setting or Background behind Sprite.
- Don't include generic summary or explanation outside the fields.
Return only valid JSON.
"""
# Compose message using LangChain's HumanMessage
content = [
{
"type": "text",
"text": "Analyze the image and describe the backdrops and characters as per instruction."
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{img_base64}"
}
}
]
agent = create_react_agent(
model = llm,
tools = [],
prompt = system_prompt
)
# call the LLM
try:
# response = llm.invoke(messages)
# response = agent.invoke({"input":human_prompt})
response = agent.invoke({"messages": [{"role": "user", "content":content}]})
print(response)
raw_response = response["messages"][-1].content
cleaned_json_str = re.sub(r"^```json\s*|\s*```$", "", raw_response.strip(), flags=re.DOTALL)
try:
detected_info = json.loads(cleaned_json_str)
except json.JSONDecodeError as e:
# If parsing fails, fallback to raw string or handle error
print("JSON parsing error:", e)
detected_info = cleaned_json_str # or handle as needed
except Exception as e:
return jsonify({"error": str(e)}), 500
# Save the detected information to a JSON file
result = {
"image_path": image_path,
"detected_info": detected_info,
}
# Save JSON result
with open("detected_image_info.json", "w") as f:
json.dump(result, f, indent=4)
print("Detection results saved to detected_image_info.json")
return jsonify(result)
if __name__ == "__main__":
app.run(debug=True)