Enhanced streamlit and uploading in huggingface
Browse files- .gitattributes +35 -0
- README.md +0 -2
- src/app/__pycache__/homepage.cpython-313.pyc +0 -0
- src/app/app.py +3 -13
- src/app/homepage.py +131 -15
- src/app/pages/__pycache__/load_data_page.cpython-313.pyc +0 -0
- src/app/pages/load_data_page.py +5 -0
- src/database_pinecone/__pycache__/create_database.cpython-313.pyc +0 -0
- src/database_pinecone/__pycache__/querry_database.cpython-313.pyc +0 -0
- src/database_pinecone/create_database.py +33 -28
- src/model/__pycache__/clip_model.cpython-313.pyc +0 -0
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
# lookalike-image-finder
|
2 |
-
"Look-A-Like Image Finder lets you search for similar images by providing either a text description or an image.
|
|
|
|
|
|
src/app/__pycache__/homepage.cpython-313.pyc
CHANGED
Binary files a/src/app/__pycache__/homepage.cpython-313.pyc and b/src/app/__pycache__/homepage.cpython-313.pyc differ
|
|
src/app/app.py
CHANGED
@@ -1,16 +1,6 @@
|
|
1 |
import homepage
|
2 |
-
|
3 |
-
search_option = ['Select an option','Search by text', 'Search by image']
|
4 |
|
5 |
homepage.setup_page()
|
6 |
-
|
7 |
-
|
8 |
-
if choosen_option.lower() == 'search by text':
|
9 |
-
user_query = homepage.get_search_text_input()
|
10 |
-
if user_query:
|
11 |
-
homepage.get_images_by_text(user_query)
|
12 |
-
elif choosen_option.lower() == 'search by image':
|
13 |
-
image_input = homepage.get_search_image_input()
|
14 |
-
if image_input:
|
15 |
-
homepage.get_images_by_image(image_input)
|
16 |
-
|
|
|
1 |
import homepage
|
2 |
+
import streamlit as st
|
|
|
3 |
|
4 |
homepage.setup_page()
|
5 |
+
homepage.search_tab()
|
6 |
+
st.link_button("Navigate to load data page",url="http://localhost:8501/load_data_page")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/app/homepage.py
CHANGED
@@ -4,32 +4,88 @@ src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..",
|
|
4 |
sys.path.append(src_directory)
|
5 |
import streamlit as st
|
6 |
from utils import logger
|
7 |
-
from database_pinecone import querry_database
|
8 |
from model.clip_model import ClipModel
|
|
|
9 |
|
10 |
clip_model = ClipModel()
|
11 |
logger = logger.get_logger()
|
12 |
|
13 |
-
PAGE_TITLE = "Look
|
14 |
-
PAGE_LAYOUT = "
|
15 |
SIDEBAR_TITLE = "Find Similar Images"
|
|
|
|
|
|
|
16 |
|
17 |
def setup_page():
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
def get_user_selection(options):
|
28 |
selected_option = st.sidebar.selectbox("Select the option", options)
|
29 |
return selected_option
|
30 |
|
31 |
def get_search_image_input():
|
32 |
-
uploaded_image = st.
|
33 |
return uploaded_image
|
34 |
|
35 |
def get_search_text_input():
|
@@ -37,11 +93,13 @@ def get_search_text_input():
|
|
37 |
return user_search
|
38 |
|
39 |
def display_images(response):
|
|
|
40 |
if response:
|
41 |
cols = st.columns(2)
|
42 |
for i, result in enumerate(response.matches):
|
43 |
with cols[i % 2]:
|
44 |
-
st.image(result.metadata["url"])
|
|
|
45 |
|
46 |
def write_message(message):
|
47 |
st.write(message)
|
@@ -49,13 +107,71 @@ def write_message(message):
|
|
49 |
def get_images_by_text(query):
|
50 |
embedding = clip_model.get_text_embedding(query)
|
51 |
response = querry_database.fetch_data(embedding)
|
52 |
-
message = f"Showing search results for {query}"
|
53 |
write_message(message)
|
54 |
images = display_images(response)
|
55 |
|
56 |
def get_images_by_image(query):
|
57 |
embedding = clip_model.get_uploaded_image_embedding(query)
|
58 |
response = querry_database.fetch_data(embedding)
|
59 |
-
message = f"Showing search results of relevant images"
|
60 |
write_message(message)
|
61 |
images = display_images(response)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
sys.path.append(src_directory)
|
5 |
import streamlit as st
|
6 |
from utils import logger
|
7 |
+
from database_pinecone import querry_database,create_database
|
8 |
from model.clip_model import ClipModel
|
9 |
+
from data import data_set
|
10 |
|
11 |
clip_model = ClipModel()
|
12 |
logger = logger.get_logger()
|
13 |
|
14 |
+
PAGE_TITLE = "Look-a-Like: Image Finder"
|
15 |
+
PAGE_LAYOUT = "wide"
|
16 |
SIDEBAR_TITLE = "Find Similar Images"
|
17 |
+
PHOTO_ID_KEY = "photo_id"
|
18 |
+
IMAGE_URL_KEY = "photo_image_url"
|
19 |
+
PINECONE_INDEX = create_database.get_index()
|
20 |
|
21 |
def setup_page():
|
22 |
+
st.set_page_config(page_title=PAGE_TITLE, layout=PAGE_LAYOUT)
|
23 |
+
st.markdown(f"""
|
24 |
+
<h1 style='color:darkblue; text-align:center; font-size:32px; margin-top:-10px;'>
|
25 |
+
<i>{PAGE_TITLE} 🔍📸</i>
|
26 |
+
</h1>
|
27 |
+
""", unsafe_allow_html=True)
|
28 |
+
st.toast("✨ Welcome to Look-a-Like: The Ultimate Image Finder! Start searching now. 🔍")
|
29 |
+
logger.info(f"Page successfully configured with title: {PAGE_TITLE}")
|
30 |
|
31 |
+
def search_tab():
|
32 |
+
|
33 |
+
st.markdown("<hr>", unsafe_allow_html=True) # To add a Horizontal line below title
|
34 |
+
|
35 |
+
if "search_query" not in st.session_state:
|
36 |
+
st.session_state.search_query = ""
|
37 |
+
if "uploaded_image" not in st.session_state:
|
38 |
+
st.session_state.uploaded_image = None
|
39 |
+
|
40 |
+
with st.container():
|
41 |
+
|
42 |
+
col1, col2 = st.columns([7, 4], gap="small")
|
43 |
+
with col1:
|
44 |
+
search_query = st.text_input(
|
45 |
+
label="🔍 Search for Images",
|
46 |
+
placeholder="Type keywords (e.g., 'sunset beach', 'city skyline')",
|
47 |
+
value=st.session_state.search_query
|
48 |
+
)
|
49 |
+
|
50 |
+
if search_query.strip() and search_query != st.session_state.search_query:
|
51 |
+
st.session_state.search_query = search_query.strip()
|
52 |
+
st.session_state.uploaded_image = None
|
53 |
+
|
54 |
+
with col2:
|
55 |
+
uploaded_image = st.file_uploader(
|
56 |
+
label="📤 Upload an Image",
|
57 |
+
type=["png", "jpg", "jpeg"],
|
58 |
+
help="Upload an image to find visually similar results."
|
59 |
+
)
|
60 |
+
|
61 |
+
if uploaded_image is not None and uploaded_image != st.session_state.uploaded_image:
|
62 |
+
st.session_state.uploaded_image = uploaded_image
|
63 |
+
st.session_state.search_query = ""
|
64 |
+
|
65 |
+
# with col3:
|
66 |
+
# st.markdown("<br>", unsafe_allow_html=True)
|
67 |
+
# if st.button(label="🗑️ Clear", help="Clear search input and uploaded image"):
|
68 |
+
# st.session_state.search_query = ""
|
69 |
+
# st.session_state.uploaded_image = None
|
70 |
+
# st.session_state.clear()
|
71 |
+
|
72 |
+
with st.container():
|
73 |
+
if st.session_state.search_query:
|
74 |
+
get_images_by_text(st.session_state.search_query)
|
75 |
+
st.session_state.search_query = ""
|
76 |
+
|
77 |
+
|
78 |
+
if st.session_state.uploaded_image:
|
79 |
+
st.image(st.session_state.uploaded_image, caption="Uploaded Image", use_container_width=True)
|
80 |
+
get_images_by_image(st.session_state.uploaded_image)
|
81 |
+
st.session_state.uploaded_image = None
|
82 |
+
|
83 |
def get_user_selection(options):
|
84 |
selected_option = st.sidebar.selectbox("Select the option", options)
|
85 |
return selected_option
|
86 |
|
87 |
def get_search_image_input():
|
88 |
+
uploaded_image = st.file_uploader("Upload the image to get similar images", type=['png', 'jpeg'])
|
89 |
return uploaded_image
|
90 |
|
91 |
def get_search_text_input():
|
|
|
93 |
return user_search
|
94 |
|
95 |
def display_images(response):
|
96 |
+
logger.info("Loading the images to dispay")
|
97 |
if response:
|
98 |
cols = st.columns(2)
|
99 |
for i, result in enumerate(response.matches):
|
100 |
with cols[i % 2]:
|
101 |
+
st.image(result.metadata["url"], width=500)
|
102 |
+
logger.info("Displayed the images successfully")
|
103 |
|
104 |
def write_message(message):
|
105 |
st.write(message)
|
|
|
107 |
def get_images_by_text(query):
|
108 |
embedding = clip_model.get_text_embedding(query)
|
109 |
response = querry_database.fetch_data(embedding)
|
110 |
+
message = f"🔍 Showing search results for {query}"
|
111 |
write_message(message)
|
112 |
images = display_images(response)
|
113 |
|
114 |
def get_images_by_image(query):
|
115 |
embedding = clip_model.get_uploaded_image_embedding(query)
|
116 |
response = querry_database.fetch_data(embedding)
|
117 |
+
message = f"🔍 Showing search results of relevant images"
|
118 |
write_message(message)
|
119 |
images = display_images(response)
|
120 |
+
|
121 |
+
def load_data():
|
122 |
+
st.sidebar.header("📊 Data Loading Parameters")
|
123 |
+
start_index = st.sidebar.number_input("Select start index", min_value=0, value=0)
|
124 |
+
end_index = st.sidebar.number_input("Select end index", min_value=0, value=100)
|
125 |
+
|
126 |
+
if start_index > end_index:
|
127 |
+
st.sidebar.error("⚠️ Start index must be earlier than the end index.")
|
128 |
+
return
|
129 |
+
|
130 |
+
if "load_clicked" not in st.session_state:
|
131 |
+
st.session_state.load_clicked = False
|
132 |
+
|
133 |
+
try:
|
134 |
+
st.sidebar.info(f"Click the button to load data from index **{start_index} to {end_index}**.")
|
135 |
+
if st.sidebar.button("🚀 Upsert Data", disabled=st.session_state.load_clicked, help="Click to insert data into the database"):
|
136 |
+
st.session_state.load_clicked = True
|
137 |
+
|
138 |
+
with st.spinner("⏳ Upserting data... Please wait"):
|
139 |
+
df = data_set.get_df(start_index, end_index)
|
140 |
+
if df.empty:
|
141 |
+
st.warning("⚠️ No data found in the selected range.")
|
142 |
+
return
|
143 |
+
success_message = st.empty()
|
144 |
+
progress_bar = st.progress(0)
|
145 |
+
start = 0
|
146 |
+
end = len(df)
|
147 |
+
for i, data in df.iterrows():
|
148 |
+
create_database.process_and_upsert_data(PINECONE_INDEX, data, IMAGE_URL_KEY, PHOTO_ID_KEY)
|
149 |
+
success_message.success(f"Row {i + 1} (ID: {data.get('photo_id', 'unknown')}) added successfully!")
|
150 |
+
logger.info(f"Row {i + 1} (ID: {data.get('photo_id', 'unknown')}) upserted successfully.")
|
151 |
+
start = start + 1
|
152 |
+
progress = int((start) / end * 100)
|
153 |
+
progress_bar.progress(progress)
|
154 |
+
progress_bar.empty()
|
155 |
+
success_message.success("All data loaded and added to the database successfully!")
|
156 |
+
st.session_state.load_clicked = False
|
157 |
+
st.rerun()
|
158 |
+
|
159 |
+
except Exception as e:
|
160 |
+
st.error(f"Error loading data: {e}")
|
161 |
+
logger.error(f"Error loading data: {e}")
|
162 |
+
st.session_state.load_clicked = False
|
163 |
+
|
164 |
+
|
165 |
+
def about_us():
|
166 |
+
if st.button("About us"):
|
167 |
+
st.write("""
|
168 |
+
This app allows you to search for images in two powerful ways:
|
169 |
+
|
170 |
+
1. **Text-based Query**: You can simply type a description or keyword, and we will fetch the most relevant images from our database.
|
171 |
+
|
172 |
+
2. **Image-based Query**: Alternatively, you can upload an image, and we'll search for similar images based on your input image.
|
173 |
+
|
174 |
+
Whether you're looking for images based on a specific text query or searching using an image, our app makes it easy to find exactly what you're looking for. Simply enter your query and get results instantly!
|
175 |
+
|
176 |
+
Explore and discover the images you need. Enjoy the search experience! 😊
|
177 |
+
""")
|
src/app/pages/__pycache__/load_data_page.cpython-313.pyc
ADDED
Binary file (3.42 kB). View file
|
|
src/app/pages/load_data_page.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import homepage
|
2 |
+
|
3 |
+
homepage.setup_page()
|
4 |
+
homepage.load_data()
|
5 |
+
|
src/database_pinecone/__pycache__/create_database.cpython-313.pyc
CHANGED
Binary files a/src/database_pinecone/__pycache__/create_database.cpython-313.pyc and b/src/database_pinecone/__pycache__/create_database.cpython-313.pyc differ
|
|
src/database_pinecone/__pycache__/querry_database.cpython-313.pyc
CHANGED
Binary files a/src/database_pinecone/__pycache__/querry_database.cpython-313.pyc and b/src/database_pinecone/__pycache__/querry_database.cpython-313.pyc differ
|
|
src/database_pinecone/create_database.py
CHANGED
@@ -5,10 +5,9 @@ sys.path.append(src_directory)
|
|
5 |
from pinecone import Pinecone, ServerlessSpec
|
6 |
import time
|
7 |
from model.clip_model import ClipModel
|
8 |
-
from data import request_images
|
9 |
-
from data import data_set
|
10 |
from config import config
|
11 |
from utils import logger
|
|
|
12 |
|
13 |
config = config.load_config()
|
14 |
logger = logger.get_logger()
|
@@ -57,38 +56,44 @@ def get_index():
|
|
57 |
logger.info(f"Error occurred while getting or creating the Pinecone index: {str(e)}", exc_info=True)
|
58 |
return index
|
59 |
|
60 |
-
def
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
index.upsert(
|
64 |
vectors=[{
|
65 |
-
"id":
|
66 |
"values": embeddings,
|
67 |
"metadata": {
|
68 |
-
|
69 |
-
|
70 |
}
|
71 |
}],
|
72 |
namespace="image-search-dataset",
|
73 |
)
|
74 |
-
logger.info(f"Successfully upserted
|
|
|
|
|
75 |
except Exception as e:
|
76 |
-
logger.
|
77 |
-
raise
|
78 |
-
|
79 |
-
def add_data_to_database(df):
|
80 |
-
try:
|
81 |
-
index = get_index()
|
82 |
-
logger.info("Starting to add the embeddings to the database")
|
83 |
-
for _, data in df.iterrows():
|
84 |
-
url = data['photo_image_url']
|
85 |
-
id = data['photo_id']
|
86 |
-
embeddings = clip_model.get_image_embedding(url)
|
87 |
-
upsert_data(index,embeddings,id,url)
|
88 |
-
logger.info("Added embeddings to the database successfully")
|
89 |
-
except Exception as e:
|
90 |
-
logger.info("Unable to add the data. Error : {e}")
|
91 |
-
|
92 |
-
|
93 |
-
# df = data_set.get_df(8000,8500)
|
94 |
-
# add_data_to_database(df)
|
|
|
5 |
from pinecone import Pinecone, ServerlessSpec
|
6 |
import time
|
7 |
from model.clip_model import ClipModel
|
|
|
|
|
8 |
from config import config
|
9 |
from utils import logger
|
10 |
+
import pandas as pd
|
11 |
|
12 |
config = config.load_config()
|
13 |
logger = logger.get_logger()
|
|
|
56 |
logger.info(f"Error occurred while getting or creating the Pinecone index: {str(e)}", exc_info=True)
|
57 |
return index
|
58 |
|
59 |
+
def process_and_upsert_data(index, data: pd.Series, url_key: str, id_key: str):
|
60 |
+
"""
|
61 |
+
Processes a single row of data (pandas Series) by extracting the URL and ID, generating image embeddings using
|
62 |
+
a clip model, and then upserting the generated embeddings into a pinecone database index.
|
63 |
+
|
64 |
+
This function handles:
|
65 |
+
- Extracting the URL and ID from the provided `data` (a pandas Series) using the specified keys (`url_key` and `id_key`).
|
66 |
+
- Using the `clip_model` to generate embeddings for the image found at the extracted URL.
|
67 |
+
- Upserting the generated embeddings, along with the photo ID and URL, into the pinecone database index using the `upsert` method.
|
68 |
+
|
69 |
+
Args:
|
70 |
+
data (pandas.Series): A single row of data from the DataFrame, containing the URL and ID.
|
71 |
+
url_key (str): The column name in the Series that contains the URL of the image.
|
72 |
+
id_key (str): The column name in the Series that contains the photo ID.
|
73 |
+
|
74 |
+
"""
|
75 |
+
# Validate if the required columns exist in the row (Series)
|
76 |
+
if url_key not in data or id_key not in data:
|
77 |
+
raise ValueError(f"Missing required keys: '{url_key}' or '{id_key}' in the data")
|
78 |
+
|
79 |
+
try:
|
80 |
+
logger.info("Started to process and upsert the data")
|
81 |
+
url = data[url_key]
|
82 |
+
photo_id = data[id_key]
|
83 |
+
embeddings = clip_model.get_image_embedding(url)
|
84 |
index.upsert(
|
85 |
vectors=[{
|
86 |
+
"id": photo_id,
|
87 |
"values": embeddings,
|
88 |
"metadata": {
|
89 |
+
"url": url,
|
90 |
+
"photo_id": photo_id
|
91 |
}
|
92 |
}],
|
93 |
namespace="image-search-dataset",
|
94 |
)
|
95 |
+
logger.info(f"Successfully upserted data for photo_id {photo_id} with URL {url}")
|
96 |
+
except ValueError as ve:
|
97 |
+
logger.error(f"ValueError: {ve}")
|
98 |
except Exception as e:
|
99 |
+
logger.error(f"Error processing row with photo_id {data.get(id_key, 'unknown')}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/model/__pycache__/clip_model.cpython-313.pyc
CHANGED
Binary files a/src/model/__pycache__/clip_model.cpython-313.pyc and b/src/model/__pycache__/clip_model.cpython-313.pyc differ
|
|