Spaces:

Amarthya7
/

Image-Question-Answering-System

Runtime error

App Files Files Community

Image-Question-Answering-System / app.py

Amarthya7

Update app.py

b056a88 verified 8 months ago

raw

history blame

6.14 kB

	"""
	Visual Question Answering Streamlit Application
	"""

	import logging
	import os
	import sys
	import time
	from datetime import datetime

	import streamlit as st
	from PIL import Image

	# Configure path to include parent directory
	sys.path.append(os.path.dirname(os.path.abspath(__file__)))

	# Configure logging
	log_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "logs")
	os.makedirs(log_dir, exist_ok=True)
	log_file = os.path.join(
	log_dir, f"vqa_app_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
	)

	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
	handlers=[logging.FileHandler(log_file), logging.StreamHandler()],
	)
	logger = logging.getLogger("vqa_app")

	# Import modules
	from models import VQAInference
	from utils.image_utils import resize_image

	# Global variables
	MODEL_OPTIONS = {"BLIP": "blip", "ViLT": "vilt"}

	# Setup directories
	uploads_dir = os.path.join(
	os.path.dirname(os.path.abspath(__file__)), "static", "uploads"
	)
	os.makedirs(uploads_dir, exist_ok=True)

	# Configure page
	st.set_page_config(
	page_title="Visual Question Answering",
	page_icon="🔍",
	layout="wide",
	initial_sidebar_state="expanded",
	)


	@st.cache_resource
	def load_model(model_name):
	"""Load the VQA model with caching for better performance"""
	try:
	logger.info(f"Loading model: {model_name}")
	return VQAInference(model_name=model_name)
	except Exception as e:
	logger.error(f"Error loading model: {str(e)}")
	st.error(f"Failed to load model: {str(e)}")
	return None


	def process_image_and_question(image_file, question, model_name):
	"""Process the uploaded image and question to generate an answer"""
	start_time = time.time()

	try:
	# Load image
	image = Image.open(image_file).convert("RGB")
	logger.info(f"Image loaded, size: {image.size}")

	# Resize image
	image = resize_image(image)
	logger.info(f"Image resized to: {image.size}")

	# Load model
	model = load_model(model_name)
	if model is None:
	return None

	# Generate answer
	logger.info(f"Generating answer for question: '{question}'")
	answer = model.predict(image, question)
	logger.info(f"Answer generated: '{answer}'")

	# Calculate processing time
	processing_time = time.time() - start_time

	return {"answer": answer, "processing_time": f"{processing_time:.2f} seconds"}
	except Exception as e:
	logger.error(f"Error processing request: {str(e)}", exc_info=True)
	return None


	def main():
	"""Main function for Streamlit app"""
	# Header
	st.title("Visual Question Answering")
	st.markdown("Upload an image, ask a question, and get AI-powered answers")

	# Sidebar for model selection
	st.sidebar.title("Model Options")
	selected_model_name = st.sidebar.radio(
	"Choose a model:", options=list(MODEL_OPTIONS.keys()), index=0
	)
	model_name = MODEL_OPTIONS[selected_model_name]

	st.sidebar.markdown("---")
	st.sidebar.markdown("## About the Models")
	st.sidebar.markdown("BLIP: General purpose VQA with free-form answers")
	st.sidebar.markdown("ViLT: Better for yes/no questions and specific categories")

	# Main content - two columns
	col1, col2 = st.columns([1, 1])

	with col1:
	st.markdown("### Upload & Ask")
	uploaded_file = st.file_uploader(
	"Upload an image:", type=["jpg", "jpeg", "png", "bmp", "gif"]
	)

	question = st.text_input(
	"Your question about the image:", placeholder="E.g., What is in this image?"
	)

	submit_button = st.button(
	"Get Answer", type="primary", use_container_width=True
	)

	# Preview uploaded image
	if uploaded_file is not None:
	st.markdown("### Image Preview")
	st.image(uploaded_file, caption="Uploaded Image",use_container_width=True)

	with col2:
	st.markdown("### AI Answer")

	# Process when submit button is clicked
	if submit_button and uploaded_file is not None and question:
	with st.spinner("Generating answer..."):
	result = process_image_and_question(uploaded_file, question, model_name)

	if result:
	st.success("Answer generated successfully!")

	# Display results
	st.markdown("#### Question:")
	st.write(question)

	st.markdown("#### Answer:")
	st.markdown(
	f"<div style='background-color: #f0f2f6; padding: 20px; border-radius: 5px;'>{result['answer']}</div>",
	unsafe_allow_html=True,
	)

	st.markdown("#### Processing Time:")
	st.text(result["processing_time"])
	else:
	st.error(
	"Failed to generate an answer. Please check the image and question, and try again."
	)

	elif not uploaded_file and submit_button:
	st.warning("Please upload an image first.")
	elif not question and submit_button:
	st.warning("Please enter a question about the image.")
	else:
	st.info("AI answers will appear here after you submit your question")

	# Information about the application
	st.markdown("---")
	st.markdown("### About Visual Question Answering")
	st.markdown("""
	This application uses multi-modal AI, combining computer vision and natural language processing
	to answer questions about images. Here are some examples of questions you can ask:

	- Objects: "What objects are in this image?"
	- Counting: "How many people are in this image?"
	- Colors: "What color is the car?"
	- Actions: "What is the person doing?"
	- Spatial relations: "What is to the left of the chair?"
	- Attributes: "Is the cat sleeping?"
	""")


	if __name__ == "__main__":
	main()