Spaces:

Testys
/

Rediones-AI

Sleeping

App Files Files Community

Rediones-AI / utils /caption_utils.py

Testys

Pushing First version before making full changes

67d6f5b 10 months ago

raw

history blame

3.94 kB

	from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
	import torch
	import requests
	from dotenv import load_dotenv
	from image_utils import UrlTest
	import os

	img = UrlTest()

	class ImageCaptioning:
	def __init__(self):
	# Initialize Model and Tokenizer
	self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	self.blip_model = BlipForConditionalGeneration.from_pretrained('Salesforce/blip-image-captioning-base')
	self.topic_generator_processor = AutoTokenizer.from_pretrained("google/flan-t5-large")
	self.topic_generator_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
	self.blip_model.eval()
	self.topic_generator_model.eval()


	def generate_caption(self, image):
	# Generate Caption
	input_text = self.blip_processor(image, return_tensors="pt")
	outputs = self.blip_model.generate(pixel_values=input_text["pixel_values"], max_new_tokens=128, do_sample=True, temperature=0.9, top_k=50, top_p=0.95)
	caption_output = [self.blip_processor.decode(output, skip_special_tokens=True) for output in outputs]

	return outputs


	def generate_topics(self, user_input, num_topics=3):
	query = f"""Generate a topic sentence idea based on the user input.
	The generated topics should portray the context or idea behind the given sentences or phrase.
	For Instance,
	- "Grocery Shopping" OR "Grocery List" OR "Shopping List": "I'm going grocery shopping tomorrow,
	and I would like to get the following things on my grocery list: Milk, Soybeans, Cowpeas,
	Saturated Water, Onions, Tomatoes, etc."
	- "Studying For Exams" OR "Exams Studies": "Exams aare coming up and I have to prepare for the core
	courses. I'll be studying for Control Systems, Software Engineering and Circuit Theory."
	- "Healthy Breakfast": "To prepare a healthy breakfast, I need the appropriate combination of balanced
	diet. I'll need oats, yogurt, fresh berries, honey and smoothies."
	- "Fitness Routine": "Starting a fitness routine involves workout clothes, running shoes,
	a water bottles, and a gym membership. With this, I can start a proper fitness plan."
	- "Summer Vacation": "Packing swimsuits and enjoy the view of the ocean."
	- "Coffee Break": "Sipping Coffee at the table."
	- "Relaxation": "Sitting at the table enjoying."

	This is what I'm expecting the model to do. Here is the input: {user_input}
	"""

	caption_input = self.topic_generator_processor(query, return_tensors="pt", padding=True, truncation=True, max_length=512)
	caption_output = self.topic_generator_model.generate(**caption_input, temperature=0.1, num_return_sequences=num_topics, do_sample=True, max_length=50, top_k=50, top_p=0.95, num_beams=5)
	caption_output = [self.topic_generator_processor.decode(output, skip_special_tokens=True) for output in caption_output]

	return caption_output

	def combo_model(self, image):
	image = img.load_image(image)
	caption = self.generate_caption(image)
	caption = self.blip_processor.decode(caption[0], skip_special_tokens=True)
	topics = self.generate_topics(caption)
	topics = [topic for topic in topics if len(topic) > 0]
	return {"caption": caption,
	"topics": topics}


	if __name__ == "__main__":
	# Initialize Model
	model = ImageCaptioning()
	# Test Image
	image = "1071642.jpg"
	# Generate Caption and Topics
	outputs = model.combo_model(image)
	print(outputs)