Harsh1306 commited on
Commit
06b1271
·
verified ·
1 Parent(s): acaf72c

Upload 5 files

Browse files
BharatCaptioner.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tensorflow as tf
3
+
4
+ class_labels = ['The Agra Fort', 'Ajanta Caves', 'Alai Darwaza', 'Amarnath Temple', 'The Amber Fort', 'Basilica of Bom Jesus', 'Brihadisvara Temple', 'Charar-e-Sharief shrine', 'Charminar', 'Chhatrapati Shivaji Terminus', 'Chota Imambara', 'Dal Lake', 'The Elephanta Caves', 'Ellora Caves', 'Fatehpur Sikri', 'Gateway of India', 'Ghats in Varanasi', 'Gol Gumbaz', 'Golden Temple', 'Group of Monuments at Mahabalipuram', 'Hampi', 'Hawa Mahal', "Humayun's Tomb", 'The India gate', 'Iron Pillar', 'Jagannath Temple, Puri', 'Jageshwar', 'Jama Masjid', 'Jamali Kamali Tomb', 'Jantar Mantar, Jaipur', 'Jantar Mantar, New Delhi', 'Kedarnath Temple', 'Khajuraho Temple', 'Konark Sun Temple', 'Mahabodhi Temple', 'Meenakshi Temple', 'Nalanda mahavihara', 'Parliament House, New Delhi', 'Qutb Minar', 'Qutb Minar Complex', 'Ram Mandir', 'Rani ki Vav', 'Rashtrapati Bhavan', 'The Red Fort', 'Sanchi', 'Supreme Court of India', 'Swaminarayan Akshardham (Delhi)', 'Taj Hotels', 'The Lotus Temple', 'The Mysore Palace', 'The Statue of Unity', 'The Taj Mahal', 'Vaishno Devi Temple', 'Venkateswara Temple, Tirumala', 'Victoria Memorial, Kolkata', 'Vivekananda Rock Memorial']
5
+
6
+ import gdown
7
+ import os
8
+ import tensorflow as tf
9
+
10
+ # URL with the file ID
11
+ #v3 - https://drive.google.com/file/d/1psuUF80mDO3EZkx2Rl9F5EkqbVHSxFqI/view?usp=sharing
12
+ #v4 - https://drive.google.com/file/d/10weM6bZ9R8xVl0KxKig9WRzss4OpIa7f/view?usp=sharing
13
+ #v5 - https://drive.google.com/file/d/1PXixJsrUaVcHEEC-jDlv4tHT2qrCrf5c/view?usp=sharing
14
+ file_id = '1PXixJsrUaVcHEEC-jDlv4tHT2qrCrf5c' # Replace with your file ID
15
+ url = f'https://drive.google.com/uc?id={file_id}'
16
+
17
+ # Output path to save the model
18
+ output = 'LandmarkClassifierV5.h5' # Replace with your model file name
19
+
20
+ # Download the file if it doesn't exist
21
+ if not os.path.exists(output):
22
+ gdown.download(url, output, quiet=False)
23
+
24
+ # Load your model
25
+ def load_model():
26
+ # Replace with code to load your actual model
27
+ loaded_model = tf.keras.models.load_model(output)
28
+ return loaded_model
29
+
30
+ model = load_model()
31
+
32
+ # Print model summary to verify
33
+ model.summary()
34
+
35
+ # Predict on a new image
36
+ from tensorflow.keras.preprocessing import image
37
+ import numpy as np
38
+
39
+ import requests
40
+ import numpy as np
41
+ from PIL import Image
42
+ from io import BytesIO
43
+ import matplotlib.pyplot as plt
44
+
45
+ def identify_landmark(img):
46
+ # Preprocess the image
47
+ img = img.resize((224, 224))
48
+ img_array = image.img_to_array(img)
49
+ img_array = np.expand_dims(img_array, axis=0)
50
+ img_array /= 255.0
51
+
52
+ # Get predictions
53
+ predictions = model.predict(img_array)
54
+
55
+ # Get the index of the class with the highest probability
56
+ predicted_class_index = np.argmax(predictions[0])
57
+
58
+ # Get the probability of the predicted class
59
+ predicted_probability = predictions[0][predicted_class_index]
60
+
61
+ # Map the predicted class index to the class label
62
+ predicted_class_label = class_labels[predicted_class_index]
63
+
64
+ return predicted_class_label,predicted_probability
65
+
66
+
67
+ # def identify_landmark(img):
68
+ # img1 = img.copy()
69
+
70
+ # # Preprocess the image
71
+ # img = img.resize((224, 224))
72
+ # img_array = image.img_to_array(img)
73
+ # img_array = np.expand_dims(img_array, axis=0)
74
+ # img_array /= 255.0
75
+
76
+ # # Get predictions
77
+ # predictions = model.predict(img_array)
78
+
79
+ # # Get the index of the class with the highest probability
80
+ # predicted_class_index = np.argmax(predictions[0])
81
+
82
+ # # Map the predicted class index to the class label
83
+ # return class_labels[predicted_class_index]
84
+
85
+
86
+ # def generate_landmark_path(img_path):
87
+ # img = image.load_img(img_path, target_size=(224, 224))
88
+ # img1 = img.copy()
89
+
90
+ # # Preprocess the image
91
+ # img = img.resize((224, 224))
92
+ # img_array = image.img_to_array(img)
93
+ # img_array = np.expand_dims(img_array, axis=0)
94
+ # img_array /= 255.0
95
+
96
+ # # Get predictions
97
+ # predictions = model.predict(img_array)
98
+
99
+ # # Get the index of the class with the highest probability
100
+ # predicted_class_index = np.argmax(predictions[0])
101
+
102
+ # # Map the predicted class index to the class label
103
+ # plt.imshow(img1)
104
+ # plt.axis("off")
105
+ # plt.title(class_labels[predicted_class_index])
106
+ # plt.show()
107
+ # return wikipedia.summary(class_labels[predicted_class_index])
108
+
109
+
110
+ # def generate_landmark_url(img_url):
111
+ # try:
112
+ # # Download the image from the URL
113
+ # response = requests.get(img_url)
114
+ # response.raise_for_status() # Check if the request was successful
115
+
116
+ # # Open the image
117
+ # img = Image.open(BytesIO(response.content))
118
+ # img1 = img.copy()
119
+
120
+ # # Preprocess the image
121
+ # img = img.resize((224, 224))
122
+ # img_array = image.img_to_array(img)
123
+ # img_array = np.expand_dims(img_array, axis=0)
124
+ # img_array /= 255.0
125
+
126
+ # # Get predictions
127
+ # predictions = model.predict(img_array)
128
+
129
+ # # Get the index of the class with the highest probability
130
+ # predicted_class_index = np.argmax(predictions[0])
131
+
132
+ # # Map the predicted class index to the class label
133
+ # plt.imshow(img1)
134
+ # plt.axis("off")
135
+ # plt.title(class_labels[predicted_class_index])
136
+ # plt.show()
137
+
138
+ # return wikipedia.summary(class_labels[predicted_class_index])
139
+
140
+ # except requests.exceptions.RequestException as e:
141
+ # print(f"Error downloading the image: {e}")
142
+ # return "Invalid image URL."
143
+ # except IOError as e:
144
+ # print(f"Error opening the image: {e}")
145
+ # return "Invalid image file."
146
+ # except Exception as e:
147
+ # print(f"An error occurred: {e}")
148
+ # return "An error occurred while processing the image."
LICENSE.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Harshal
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
__pycache__/bharatcaptioner_demo.cpython-312.pyc ADDED
Binary file (4.77 kB). View file
 
app.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import streamlit as st
4
+ from PIL import Image, UnidentifiedImageError, ExifTags
5
+ import requests
6
+ from io import BytesIO
7
+ import wikipedia
8
+ from easygoogletranslate import EasyGoogleTranslate
9
+ from BharatCaptioner import identify_landmark
10
+ from groq import Groq
11
+ import hashlib
12
+
13
+ # Initialize EasyGoogleTranslate
14
+ translator = EasyGoogleTranslate(source_language="en", target_language="hi", timeout=10)
15
+
16
+ # Load configuration for Groq API key
17
+ working_dir = os.path.dirname(os.path.abspath(__file__))
18
+ config_data = json.load(open(f"{working_dir}/config.json"))
19
+ GROQ_API_KEY = config_data["GROQ_API_KEY"]
20
+ os.environ["GROQ_API_KEY"] = GROQ_API_KEY
21
+
22
+ client = Groq()
23
+
24
+ # Title of the Streamlit app
25
+ st.title("BharatCaptioner with Conversational Chatbot")
26
+ st.write(
27
+ "A tool to identify/describe Indian Landmarks in Indic Languages and chat about the image."
28
+ )
29
+
30
+ # Sidebar details
31
+ st.sidebar.title("Developed by Harshal and Harsh Pandey")
32
+ st.sidebar.write(
33
+ "**For the Model that I trained**: [Mail me here](mailto:[email protected])"
34
+ )
35
+ st.sidebar.write(
36
+ "**For the Code**: [GitHub Repo](https://github.com/justharshal2023/BharatCaptioner)"
37
+ )
38
+ st.sidebar.write(
39
+ "**Connect with me**: [LinkedIn](https://www.linkedin.com/in/harshal-123a90250/)"
40
+ )
41
+
42
+ # Image upload or URL input
43
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
44
+ url = st.text_input("Or enter a valid image URL...")
45
+
46
+ image = None
47
+ error_message = None
48
+ landmark = None
49
+ summary = None
50
+ caption = None
51
+
52
+
53
+ # Function to correct image orientation
54
+ def correct_image_orientation(img):
55
+ try:
56
+ for orientation in ExifTags.TAGS.keys():
57
+ if ExifTags.TAGS[orientation] == "Orientation":
58
+ break
59
+ exif = img._getexif()
60
+ if exif is not None:
61
+ orientation = exif[orientation]
62
+ if orientation == 3:
63
+ img = img.rotate(180, expand=True)
64
+ elif orientation == 6:
65
+ img = img.rotate(270, expand=True)
66
+ elif orientation == 8:
67
+ img = img.rotate(90, expand=True)
68
+ except (AttributeError, KeyError, IndexError):
69
+ pass
70
+ return img
71
+
72
+
73
+ # Function to get a unique hash for the image
74
+ def get_image_hash(image):
75
+ img_bytes = image.tobytes()
76
+ return hashlib.md5(img_bytes).hexdigest()
77
+
78
+
79
+ # Check if new image or URL is uploaded and reset the chat history
80
+ def reset_chat_if_new_image():
81
+ if "last_uploaded_hash" not in st.session_state:
82
+ st.session_state["last_uploaded_hash"] = None
83
+
84
+ # Process the new image or URL
85
+ if uploaded_file:
86
+ image = Image.open(uploaded_file)
87
+ image = correct_image_orientation(image)
88
+ new_image_hash = get_image_hash(image)
89
+ elif url:
90
+ try:
91
+ response = requests.get(url)
92
+ response.raise_for_status()
93
+ image = Image.open(BytesIO(response.content))
94
+ image = correct_image_orientation(image)
95
+ new_image_hash = get_image_hash(image)
96
+ except (requests.exceptions.RequestException, UnidentifiedImageError):
97
+ image = None
98
+ new_image_hash = None
99
+ error_message = (
100
+ "Error: The provided URL is invalid or the image could not be loaded."
101
+ )
102
+ st.error(error_message)
103
+ else:
104
+ image = None
105
+ new_image_hash = None
106
+
107
+ # If the image is new, reset the chat and session state
108
+ if new_image_hash and new_image_hash != st.session_state["last_uploaded_hash"]:
109
+ st.session_state.clear()
110
+ st.session_state["last_uploaded_hash"] = new_image_hash
111
+ st.experimental_rerun()
112
+
113
+ return image
114
+
115
+
116
+ # Call the reset function to check for new images or URL
117
+ image = reset_chat_if_new_image()
118
+
119
+ # If an image is provided
120
+ if image is not None:
121
+ # Resize image for processing
122
+ image = image.resize((256, 256))
123
+
124
+ # Identify the landmark using BharatCaptioner
125
+ landmark, prob = identify_landmark(image)
126
+ summary = wikipedia.summary(landmark, sentences=3) # Shortened summary
127
+ st.write(f"**Landmark Identified:** {landmark} (Confidence: {prob:.2f})")
128
+
129
+ # Display image and landmark name in the sidebar
130
+ with st.sidebar:
131
+ st.image(image, caption="Current Image", use_column_width=True)
132
+ st.write(f"**Landmark:** {landmark}")
133
+
134
+ # Chatbot functionality
135
+ st.write("### Chat with the Chatbot about the Image")
136
+ caption = f"The landmark in the image is {landmark}. {summary}"
137
+
138
+ # Initialize chat history in session state if not present
139
+ if "chat_history" not in st.session_state:
140
+ st.session_state["chat_history"] = []
141
+
142
+ # Chatbot introduction message with bold text for landmark and question
143
+ if not st.session_state.get("chatbot_started"):
144
+ chatbot_intro = f"Hello! I see the image is of **{landmark}**. {summary} **Would you like to know more** about this landmark?"
145
+ st.session_state["chat_history"].append(
146
+ {"role": "assistant", "content": chatbot_intro}
147
+ )
148
+ st.session_state["chatbot_started"] = True
149
+
150
+ # Display chat history
151
+ for message in st.session_state.chat_history:
152
+ with st.chat_message(message["role"]):
153
+ st.markdown(message["content"])
154
+
155
+ # User input
156
+ user_prompt = st.chat_input("Ask the Chatbot about the image...")
157
+
158
+ if user_prompt:
159
+ st.chat_message("user").markdown(user_prompt)
160
+ st.session_state.chat_history.append({"role": "user", "content": user_prompt})
161
+
162
+ # Send the user's message to the LLaMA chatbot
163
+ messages = [
164
+ {
165
+ "role": "system",
166
+ "content": "You are a helpful image conversational assistant. "
167
+ + f"The caption of the image is: {caption}",
168
+ },
169
+ *st.session_state.chat_history,
170
+ ]
171
+
172
+ response = client.chat.completions.create(
173
+ model="llama-3.1-8b-instant", messages=messages
174
+ )
175
+
176
+ assistant_response = response.choices[0].message.content
177
+ st.session_state.chat_history.append(
178
+ {"role": "assistant", "content": assistant_response}
179
+ )
180
+
181
+ # Display chatbot response
182
+ with st.chat_message("assistant"):
183
+ st.markdown(assistant_response)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.8.0
2
+ easygoogletranslate
3
+ wikipedia
4
+ pillow
5
+ requests
6
+ matplotlib
7
+ tensorflow
8
+ gdown
9
+ transformers
10
+ torch
11
+ tf-keras
12
+ groq