Spaces:
Sleeping
Sleeping
Upload 5 files
Browse files- BharatCaptioner.py +148 -0
- LICENSE.txt +21 -0
- __pycache__/bharatcaptioner_demo.cpython-312.pyc +0 -0
- app.py +183 -0
- requirements.txt +12 -0
BharatCaptioner.py
ADDED
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import tensorflow as tf
|
3 |
+
|
4 |
+
class_labels = ['The Agra Fort', 'Ajanta Caves', 'Alai Darwaza', 'Amarnath Temple', 'The Amber Fort', 'Basilica of Bom Jesus', 'Brihadisvara Temple', 'Charar-e-Sharief shrine', 'Charminar', 'Chhatrapati Shivaji Terminus', 'Chota Imambara', 'Dal Lake', 'The Elephanta Caves', 'Ellora Caves', 'Fatehpur Sikri', 'Gateway of India', 'Ghats in Varanasi', 'Gol Gumbaz', 'Golden Temple', 'Group of Monuments at Mahabalipuram', 'Hampi', 'Hawa Mahal', "Humayun's Tomb", 'The India gate', 'Iron Pillar', 'Jagannath Temple, Puri', 'Jageshwar', 'Jama Masjid', 'Jamali Kamali Tomb', 'Jantar Mantar, Jaipur', 'Jantar Mantar, New Delhi', 'Kedarnath Temple', 'Khajuraho Temple', 'Konark Sun Temple', 'Mahabodhi Temple', 'Meenakshi Temple', 'Nalanda mahavihara', 'Parliament House, New Delhi', 'Qutb Minar', 'Qutb Minar Complex', 'Ram Mandir', 'Rani ki Vav', 'Rashtrapati Bhavan', 'The Red Fort', 'Sanchi', 'Supreme Court of India', 'Swaminarayan Akshardham (Delhi)', 'Taj Hotels', 'The Lotus Temple', 'The Mysore Palace', 'The Statue of Unity', 'The Taj Mahal', 'Vaishno Devi Temple', 'Venkateswara Temple, Tirumala', 'Victoria Memorial, Kolkata', 'Vivekananda Rock Memorial']
|
5 |
+
|
6 |
+
import gdown
|
7 |
+
import os
|
8 |
+
import tensorflow as tf
|
9 |
+
|
10 |
+
# URL with the file ID
|
11 |
+
#v3 - https://drive.google.com/file/d/1psuUF80mDO3EZkx2Rl9F5EkqbVHSxFqI/view?usp=sharing
|
12 |
+
#v4 - https://drive.google.com/file/d/10weM6bZ9R8xVl0KxKig9WRzss4OpIa7f/view?usp=sharing
|
13 |
+
#v5 - https://drive.google.com/file/d/1PXixJsrUaVcHEEC-jDlv4tHT2qrCrf5c/view?usp=sharing
|
14 |
+
file_id = '1PXixJsrUaVcHEEC-jDlv4tHT2qrCrf5c' # Replace with your file ID
|
15 |
+
url = f'https://drive.google.com/uc?id={file_id}'
|
16 |
+
|
17 |
+
# Output path to save the model
|
18 |
+
output = 'LandmarkClassifierV5.h5' # Replace with your model file name
|
19 |
+
|
20 |
+
# Download the file if it doesn't exist
|
21 |
+
if not os.path.exists(output):
|
22 |
+
gdown.download(url, output, quiet=False)
|
23 |
+
|
24 |
+
# Load your model
|
25 |
+
def load_model():
|
26 |
+
# Replace with code to load your actual model
|
27 |
+
loaded_model = tf.keras.models.load_model(output)
|
28 |
+
return loaded_model
|
29 |
+
|
30 |
+
model = load_model()
|
31 |
+
|
32 |
+
# Print model summary to verify
|
33 |
+
model.summary()
|
34 |
+
|
35 |
+
# Predict on a new image
|
36 |
+
from tensorflow.keras.preprocessing import image
|
37 |
+
import numpy as np
|
38 |
+
|
39 |
+
import requests
|
40 |
+
import numpy as np
|
41 |
+
from PIL import Image
|
42 |
+
from io import BytesIO
|
43 |
+
import matplotlib.pyplot as plt
|
44 |
+
|
45 |
+
def identify_landmark(img):
|
46 |
+
# Preprocess the image
|
47 |
+
img = img.resize((224, 224))
|
48 |
+
img_array = image.img_to_array(img)
|
49 |
+
img_array = np.expand_dims(img_array, axis=0)
|
50 |
+
img_array /= 255.0
|
51 |
+
|
52 |
+
# Get predictions
|
53 |
+
predictions = model.predict(img_array)
|
54 |
+
|
55 |
+
# Get the index of the class with the highest probability
|
56 |
+
predicted_class_index = np.argmax(predictions[0])
|
57 |
+
|
58 |
+
# Get the probability of the predicted class
|
59 |
+
predicted_probability = predictions[0][predicted_class_index]
|
60 |
+
|
61 |
+
# Map the predicted class index to the class label
|
62 |
+
predicted_class_label = class_labels[predicted_class_index]
|
63 |
+
|
64 |
+
return predicted_class_label,predicted_probability
|
65 |
+
|
66 |
+
|
67 |
+
# def identify_landmark(img):
|
68 |
+
# img1 = img.copy()
|
69 |
+
|
70 |
+
# # Preprocess the image
|
71 |
+
# img = img.resize((224, 224))
|
72 |
+
# img_array = image.img_to_array(img)
|
73 |
+
# img_array = np.expand_dims(img_array, axis=0)
|
74 |
+
# img_array /= 255.0
|
75 |
+
|
76 |
+
# # Get predictions
|
77 |
+
# predictions = model.predict(img_array)
|
78 |
+
|
79 |
+
# # Get the index of the class with the highest probability
|
80 |
+
# predicted_class_index = np.argmax(predictions[0])
|
81 |
+
|
82 |
+
# # Map the predicted class index to the class label
|
83 |
+
# return class_labels[predicted_class_index]
|
84 |
+
|
85 |
+
|
86 |
+
# def generate_landmark_path(img_path):
|
87 |
+
# img = image.load_img(img_path, target_size=(224, 224))
|
88 |
+
# img1 = img.copy()
|
89 |
+
|
90 |
+
# # Preprocess the image
|
91 |
+
# img = img.resize((224, 224))
|
92 |
+
# img_array = image.img_to_array(img)
|
93 |
+
# img_array = np.expand_dims(img_array, axis=0)
|
94 |
+
# img_array /= 255.0
|
95 |
+
|
96 |
+
# # Get predictions
|
97 |
+
# predictions = model.predict(img_array)
|
98 |
+
|
99 |
+
# # Get the index of the class with the highest probability
|
100 |
+
# predicted_class_index = np.argmax(predictions[0])
|
101 |
+
|
102 |
+
# # Map the predicted class index to the class label
|
103 |
+
# plt.imshow(img1)
|
104 |
+
# plt.axis("off")
|
105 |
+
# plt.title(class_labels[predicted_class_index])
|
106 |
+
# plt.show()
|
107 |
+
# return wikipedia.summary(class_labels[predicted_class_index])
|
108 |
+
|
109 |
+
|
110 |
+
# def generate_landmark_url(img_url):
|
111 |
+
# try:
|
112 |
+
# # Download the image from the URL
|
113 |
+
# response = requests.get(img_url)
|
114 |
+
# response.raise_for_status() # Check if the request was successful
|
115 |
+
|
116 |
+
# # Open the image
|
117 |
+
# img = Image.open(BytesIO(response.content))
|
118 |
+
# img1 = img.copy()
|
119 |
+
|
120 |
+
# # Preprocess the image
|
121 |
+
# img = img.resize((224, 224))
|
122 |
+
# img_array = image.img_to_array(img)
|
123 |
+
# img_array = np.expand_dims(img_array, axis=0)
|
124 |
+
# img_array /= 255.0
|
125 |
+
|
126 |
+
# # Get predictions
|
127 |
+
# predictions = model.predict(img_array)
|
128 |
+
|
129 |
+
# # Get the index of the class with the highest probability
|
130 |
+
# predicted_class_index = np.argmax(predictions[0])
|
131 |
+
|
132 |
+
# # Map the predicted class index to the class label
|
133 |
+
# plt.imshow(img1)
|
134 |
+
# plt.axis("off")
|
135 |
+
# plt.title(class_labels[predicted_class_index])
|
136 |
+
# plt.show()
|
137 |
+
|
138 |
+
# return wikipedia.summary(class_labels[predicted_class_index])
|
139 |
+
|
140 |
+
# except requests.exceptions.RequestException as e:
|
141 |
+
# print(f"Error downloading the image: {e}")
|
142 |
+
# return "Invalid image URL."
|
143 |
+
# except IOError as e:
|
144 |
+
# print(f"Error opening the image: {e}")
|
145 |
+
# return "Invalid image file."
|
146 |
+
# except Exception as e:
|
147 |
+
# print(f"An error occurred: {e}")
|
148 |
+
# return "An error occurred while processing the image."
|
LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2024 Harshal
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
__pycache__/bharatcaptioner_demo.cpython-312.pyc
ADDED
Binary file (4.77 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import streamlit as st
|
4 |
+
from PIL import Image, UnidentifiedImageError, ExifTags
|
5 |
+
import requests
|
6 |
+
from io import BytesIO
|
7 |
+
import wikipedia
|
8 |
+
from easygoogletranslate import EasyGoogleTranslate
|
9 |
+
from BharatCaptioner import identify_landmark
|
10 |
+
from groq import Groq
|
11 |
+
import hashlib
|
12 |
+
|
13 |
+
# Initialize EasyGoogleTranslate
|
14 |
+
translator = EasyGoogleTranslate(source_language="en", target_language="hi", timeout=10)
|
15 |
+
|
16 |
+
# Load configuration for Groq API key
|
17 |
+
working_dir = os.path.dirname(os.path.abspath(__file__))
|
18 |
+
config_data = json.load(open(f"{working_dir}/config.json"))
|
19 |
+
GROQ_API_KEY = config_data["GROQ_API_KEY"]
|
20 |
+
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
|
21 |
+
|
22 |
+
client = Groq()
|
23 |
+
|
24 |
+
# Title of the Streamlit app
|
25 |
+
st.title("BharatCaptioner with Conversational Chatbot")
|
26 |
+
st.write(
|
27 |
+
"A tool to identify/describe Indian Landmarks in Indic Languages and chat about the image."
|
28 |
+
)
|
29 |
+
|
30 |
+
# Sidebar details
|
31 |
+
st.sidebar.title("Developed by Harshal and Harsh Pandey")
|
32 |
+
st.sidebar.write(
|
33 |
+
"**For the Model that I trained**: [Mail me here](mailto:[email protected])"
|
34 |
+
)
|
35 |
+
st.sidebar.write(
|
36 |
+
"**For the Code**: [GitHub Repo](https://github.com/justharshal2023/BharatCaptioner)"
|
37 |
+
)
|
38 |
+
st.sidebar.write(
|
39 |
+
"**Connect with me**: [LinkedIn](https://www.linkedin.com/in/harshal-123a90250/)"
|
40 |
+
)
|
41 |
+
|
42 |
+
# Image upload or URL input
|
43 |
+
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
|
44 |
+
url = st.text_input("Or enter a valid image URL...")
|
45 |
+
|
46 |
+
image = None
|
47 |
+
error_message = None
|
48 |
+
landmark = None
|
49 |
+
summary = None
|
50 |
+
caption = None
|
51 |
+
|
52 |
+
|
53 |
+
# Function to correct image orientation
|
54 |
+
def correct_image_orientation(img):
|
55 |
+
try:
|
56 |
+
for orientation in ExifTags.TAGS.keys():
|
57 |
+
if ExifTags.TAGS[orientation] == "Orientation":
|
58 |
+
break
|
59 |
+
exif = img._getexif()
|
60 |
+
if exif is not None:
|
61 |
+
orientation = exif[orientation]
|
62 |
+
if orientation == 3:
|
63 |
+
img = img.rotate(180, expand=True)
|
64 |
+
elif orientation == 6:
|
65 |
+
img = img.rotate(270, expand=True)
|
66 |
+
elif orientation == 8:
|
67 |
+
img = img.rotate(90, expand=True)
|
68 |
+
except (AttributeError, KeyError, IndexError):
|
69 |
+
pass
|
70 |
+
return img
|
71 |
+
|
72 |
+
|
73 |
+
# Function to get a unique hash for the image
|
74 |
+
def get_image_hash(image):
|
75 |
+
img_bytes = image.tobytes()
|
76 |
+
return hashlib.md5(img_bytes).hexdigest()
|
77 |
+
|
78 |
+
|
79 |
+
# Check if new image or URL is uploaded and reset the chat history
|
80 |
+
def reset_chat_if_new_image():
|
81 |
+
if "last_uploaded_hash" not in st.session_state:
|
82 |
+
st.session_state["last_uploaded_hash"] = None
|
83 |
+
|
84 |
+
# Process the new image or URL
|
85 |
+
if uploaded_file:
|
86 |
+
image = Image.open(uploaded_file)
|
87 |
+
image = correct_image_orientation(image)
|
88 |
+
new_image_hash = get_image_hash(image)
|
89 |
+
elif url:
|
90 |
+
try:
|
91 |
+
response = requests.get(url)
|
92 |
+
response.raise_for_status()
|
93 |
+
image = Image.open(BytesIO(response.content))
|
94 |
+
image = correct_image_orientation(image)
|
95 |
+
new_image_hash = get_image_hash(image)
|
96 |
+
except (requests.exceptions.RequestException, UnidentifiedImageError):
|
97 |
+
image = None
|
98 |
+
new_image_hash = None
|
99 |
+
error_message = (
|
100 |
+
"Error: The provided URL is invalid or the image could not be loaded."
|
101 |
+
)
|
102 |
+
st.error(error_message)
|
103 |
+
else:
|
104 |
+
image = None
|
105 |
+
new_image_hash = None
|
106 |
+
|
107 |
+
# If the image is new, reset the chat and session state
|
108 |
+
if new_image_hash and new_image_hash != st.session_state["last_uploaded_hash"]:
|
109 |
+
st.session_state.clear()
|
110 |
+
st.session_state["last_uploaded_hash"] = new_image_hash
|
111 |
+
st.experimental_rerun()
|
112 |
+
|
113 |
+
return image
|
114 |
+
|
115 |
+
|
116 |
+
# Call the reset function to check for new images or URL
|
117 |
+
image = reset_chat_if_new_image()
|
118 |
+
|
119 |
+
# If an image is provided
|
120 |
+
if image is not None:
|
121 |
+
# Resize image for processing
|
122 |
+
image = image.resize((256, 256))
|
123 |
+
|
124 |
+
# Identify the landmark using BharatCaptioner
|
125 |
+
landmark, prob = identify_landmark(image)
|
126 |
+
summary = wikipedia.summary(landmark, sentences=3) # Shortened summary
|
127 |
+
st.write(f"**Landmark Identified:** {landmark} (Confidence: {prob:.2f})")
|
128 |
+
|
129 |
+
# Display image and landmark name in the sidebar
|
130 |
+
with st.sidebar:
|
131 |
+
st.image(image, caption="Current Image", use_column_width=True)
|
132 |
+
st.write(f"**Landmark:** {landmark}")
|
133 |
+
|
134 |
+
# Chatbot functionality
|
135 |
+
st.write("### Chat with the Chatbot about the Image")
|
136 |
+
caption = f"The landmark in the image is {landmark}. {summary}"
|
137 |
+
|
138 |
+
# Initialize chat history in session state if not present
|
139 |
+
if "chat_history" not in st.session_state:
|
140 |
+
st.session_state["chat_history"] = []
|
141 |
+
|
142 |
+
# Chatbot introduction message with bold text for landmark and question
|
143 |
+
if not st.session_state.get("chatbot_started"):
|
144 |
+
chatbot_intro = f"Hello! I see the image is of **{landmark}**. {summary} **Would you like to know more** about this landmark?"
|
145 |
+
st.session_state["chat_history"].append(
|
146 |
+
{"role": "assistant", "content": chatbot_intro}
|
147 |
+
)
|
148 |
+
st.session_state["chatbot_started"] = True
|
149 |
+
|
150 |
+
# Display chat history
|
151 |
+
for message in st.session_state.chat_history:
|
152 |
+
with st.chat_message(message["role"]):
|
153 |
+
st.markdown(message["content"])
|
154 |
+
|
155 |
+
# User input
|
156 |
+
user_prompt = st.chat_input("Ask the Chatbot about the image...")
|
157 |
+
|
158 |
+
if user_prompt:
|
159 |
+
st.chat_message("user").markdown(user_prompt)
|
160 |
+
st.session_state.chat_history.append({"role": "user", "content": user_prompt})
|
161 |
+
|
162 |
+
# Send the user's message to the LLaMA chatbot
|
163 |
+
messages = [
|
164 |
+
{
|
165 |
+
"role": "system",
|
166 |
+
"content": "You are a helpful image conversational assistant. "
|
167 |
+
+ f"The caption of the image is: {caption}",
|
168 |
+
},
|
169 |
+
*st.session_state.chat_history,
|
170 |
+
]
|
171 |
+
|
172 |
+
response = client.chat.completions.create(
|
173 |
+
model="llama-3.1-8b-instant", messages=messages
|
174 |
+
)
|
175 |
+
|
176 |
+
assistant_response = response.choices[0].message.content
|
177 |
+
st.session_state.chat_history.append(
|
178 |
+
{"role": "assistant", "content": assistant_response}
|
179 |
+
)
|
180 |
+
|
181 |
+
# Display chatbot response
|
182 |
+
with st.chat_message("assistant"):
|
183 |
+
st.markdown(assistant_response)
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit==1.8.0
|
2 |
+
easygoogletranslate
|
3 |
+
wikipedia
|
4 |
+
pillow
|
5 |
+
requests
|
6 |
+
matplotlib
|
7 |
+
tensorflow
|
8 |
+
gdown
|
9 |
+
transformers
|
10 |
+
torch
|
11 |
+
tf-keras
|
12 |
+
groq
|