Vela commited on
Commit
01eeb3f
·
2 Parent(s): 0930d33 2d4160c

Enhanced streamlit and uploading in huggingface

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md DELETED
@@ -1,2 +0,0 @@
1
- # lookalike-image-finder
2
- "Look-A-Like Image Finder lets you search for similar images by providing either a text description or an image.
 
 
 
src/app/__pycache__/homepage.cpython-313.pyc CHANGED
Binary files a/src/app/__pycache__/homepage.cpython-313.pyc and b/src/app/__pycache__/homepage.cpython-313.pyc differ
 
src/app/app.py CHANGED
@@ -1,16 +1,6 @@
1
  import homepage
2
-
3
- search_option = ['Select an option','Search by text', 'Search by image']
4
 
5
  homepage.setup_page()
6
-
7
- choosen_option = homepage.get_user_selection(search_option)
8
- if choosen_option.lower() == 'search by text':
9
- user_query = homepage.get_search_text_input()
10
- if user_query:
11
- homepage.get_images_by_text(user_query)
12
- elif choosen_option.lower() == 'search by image':
13
- image_input = homepage.get_search_image_input()
14
- if image_input:
15
- homepage.get_images_by_image(image_input)
16
-
 
1
  import homepage
2
+ import streamlit as st
 
3
 
4
  homepage.setup_page()
5
+ homepage.search_tab()
6
+ st.link_button("Navigate to load data page",url="http://localhost:8501/load_data_page")
 
 
 
 
 
 
 
 
 
src/app/homepage.py CHANGED
@@ -4,32 +4,88 @@ src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..",
4
  sys.path.append(src_directory)
5
  import streamlit as st
6
  from utils import logger
7
- from database_pinecone import querry_database
8
  from model.clip_model import ClipModel
 
9
 
10
  clip_model = ClipModel()
11
  logger = logger.get_logger()
12
 
13
- PAGE_TITLE = "Look A Like - Image Finder"
14
- PAGE_LAYOUT = "centered"
15
  SIDEBAR_TITLE = "Find Similar Images"
 
 
 
16
 
17
  def setup_page():
18
- if 'is_page_configured' not in st.session_state:
19
- st.set_page_config(page_title=PAGE_TITLE, layout=PAGE_LAYOUT)
20
- st.title(PAGE_TITLE)
21
- st.sidebar.title(SIDEBAR_TITLE)
22
- logger.info(f"Page configured with title '{PAGE_TITLE}', layout '{PAGE_LAYOUT}', and sidebar title '{SIDEBAR_TITLE}'")
23
- st.session_state.is_page_configured = True
24
- else:
25
- logger.info("Page configuration already completed. Skipping setup.")
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def get_user_selection(options):
28
  selected_option = st.sidebar.selectbox("Select the option", options)
29
  return selected_option
30
 
31
  def get_search_image_input():
32
- uploaded_image = st.sidebar.file_uploader("Upload the image to get similar images", type=['png', 'jpeg'])
33
  return uploaded_image
34
 
35
  def get_search_text_input():
@@ -37,11 +93,13 @@ def get_search_text_input():
37
  return user_search
38
 
39
  def display_images(response):
 
40
  if response:
41
  cols = st.columns(2)
42
  for i, result in enumerate(response.matches):
43
  with cols[i % 2]:
44
- st.image(result.metadata["url"])
 
45
 
46
  def write_message(message):
47
  st.write(message)
@@ -49,13 +107,71 @@ def write_message(message):
49
  def get_images_by_text(query):
50
  embedding = clip_model.get_text_embedding(query)
51
  response = querry_database.fetch_data(embedding)
52
- message = f"Showing search results for {query}"
53
  write_message(message)
54
  images = display_images(response)
55
 
56
  def get_images_by_image(query):
57
  embedding = clip_model.get_uploaded_image_embedding(query)
58
  response = querry_database.fetch_data(embedding)
59
- message = f"Showing search results of relevant images"
60
  write_message(message)
61
  images = display_images(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  sys.path.append(src_directory)
5
  import streamlit as st
6
  from utils import logger
7
+ from database_pinecone import querry_database,create_database
8
  from model.clip_model import ClipModel
9
+ from data import data_set
10
 
11
  clip_model = ClipModel()
12
  logger = logger.get_logger()
13
 
14
+ PAGE_TITLE = "Look-a-Like: Image Finder"
15
+ PAGE_LAYOUT = "wide"
16
  SIDEBAR_TITLE = "Find Similar Images"
17
+ PHOTO_ID_KEY = "photo_id"
18
+ IMAGE_URL_KEY = "photo_image_url"
19
+ PINECONE_INDEX = create_database.get_index()
20
 
21
  def setup_page():
22
+ st.set_page_config(page_title=PAGE_TITLE, layout=PAGE_LAYOUT)
23
+ st.markdown(f"""
24
+ <h1 style='color:darkblue; text-align:center; font-size:32px; margin-top:-10px;'>
25
+ <i>{PAGE_TITLE} 🔍📸</i>
26
+ </h1>
27
+ """, unsafe_allow_html=True)
28
+ st.toast("✨ Welcome to Look-a-Like: The Ultimate Image Finder! Start searching now. 🔍")
29
+ logger.info(f"Page successfully configured with title: {PAGE_TITLE}")
30
 
31
+ def search_tab():
32
+
33
+ st.markdown("<hr>", unsafe_allow_html=True) # To add a Horizontal line below title
34
+
35
+ if "search_query" not in st.session_state:
36
+ st.session_state.search_query = ""
37
+ if "uploaded_image" not in st.session_state:
38
+ st.session_state.uploaded_image = None
39
+
40
+ with st.container():
41
+
42
+ col1, col2 = st.columns([7, 4], gap="small")
43
+ with col1:
44
+ search_query = st.text_input(
45
+ label="🔍 Search for Images",
46
+ placeholder="Type keywords (e.g., 'sunset beach', 'city skyline')",
47
+ value=st.session_state.search_query
48
+ )
49
+
50
+ if search_query.strip() and search_query != st.session_state.search_query:
51
+ st.session_state.search_query = search_query.strip()
52
+ st.session_state.uploaded_image = None
53
+
54
+ with col2:
55
+ uploaded_image = st.file_uploader(
56
+ label="📤 Upload an Image",
57
+ type=["png", "jpg", "jpeg"],
58
+ help="Upload an image to find visually similar results."
59
+ )
60
+
61
+ if uploaded_image is not None and uploaded_image != st.session_state.uploaded_image:
62
+ st.session_state.uploaded_image = uploaded_image
63
+ st.session_state.search_query = ""
64
+
65
+ # with col3:
66
+ # st.markdown("<br>", unsafe_allow_html=True)
67
+ # if st.button(label="🗑️ Clear", help="Clear search input and uploaded image"):
68
+ # st.session_state.search_query = ""
69
+ # st.session_state.uploaded_image = None
70
+ # st.session_state.clear()
71
+
72
+ with st.container():
73
+ if st.session_state.search_query:
74
+ get_images_by_text(st.session_state.search_query)
75
+ st.session_state.search_query = ""
76
+
77
+
78
+ if st.session_state.uploaded_image:
79
+ st.image(st.session_state.uploaded_image, caption="Uploaded Image", use_container_width=True)
80
+ get_images_by_image(st.session_state.uploaded_image)
81
+ st.session_state.uploaded_image = None
82
+
83
  def get_user_selection(options):
84
  selected_option = st.sidebar.selectbox("Select the option", options)
85
  return selected_option
86
 
87
  def get_search_image_input():
88
+ uploaded_image = st.file_uploader("Upload the image to get similar images", type=['png', 'jpeg'])
89
  return uploaded_image
90
 
91
  def get_search_text_input():
 
93
  return user_search
94
 
95
  def display_images(response):
96
+ logger.info("Loading the images to dispay")
97
  if response:
98
  cols = st.columns(2)
99
  for i, result in enumerate(response.matches):
100
  with cols[i % 2]:
101
+ st.image(result.metadata["url"], width=500)
102
+ logger.info("Displayed the images successfully")
103
 
104
  def write_message(message):
105
  st.write(message)
 
107
  def get_images_by_text(query):
108
  embedding = clip_model.get_text_embedding(query)
109
  response = querry_database.fetch_data(embedding)
110
+ message = f"🔍 Showing search results for {query}"
111
  write_message(message)
112
  images = display_images(response)
113
 
114
  def get_images_by_image(query):
115
  embedding = clip_model.get_uploaded_image_embedding(query)
116
  response = querry_database.fetch_data(embedding)
117
+ message = f"🔍 Showing search results of relevant images"
118
  write_message(message)
119
  images = display_images(response)
120
+
121
+ def load_data():
122
+ st.sidebar.header("📊 Data Loading Parameters")
123
+ start_index = st.sidebar.number_input("Select start index", min_value=0, value=0)
124
+ end_index = st.sidebar.number_input("Select end index", min_value=0, value=100)
125
+
126
+ if start_index > end_index:
127
+ st.sidebar.error("⚠️ Start index must be earlier than the end index.")
128
+ return
129
+
130
+ if "load_clicked" not in st.session_state:
131
+ st.session_state.load_clicked = False
132
+
133
+ try:
134
+ st.sidebar.info(f"Click the button to load data from index **{start_index} to {end_index}**.")
135
+ if st.sidebar.button("🚀 Upsert Data", disabled=st.session_state.load_clicked, help="Click to insert data into the database"):
136
+ st.session_state.load_clicked = True
137
+
138
+ with st.spinner("⏳ Upserting data... Please wait"):
139
+ df = data_set.get_df(start_index, end_index)
140
+ if df.empty:
141
+ st.warning("⚠️ No data found in the selected range.")
142
+ return
143
+ success_message = st.empty()
144
+ progress_bar = st.progress(0)
145
+ start = 0
146
+ end = len(df)
147
+ for i, data in df.iterrows():
148
+ create_database.process_and_upsert_data(PINECONE_INDEX, data, IMAGE_URL_KEY, PHOTO_ID_KEY)
149
+ success_message.success(f"Row {i + 1} (ID: {data.get('photo_id', 'unknown')}) added successfully!")
150
+ logger.info(f"Row {i + 1} (ID: {data.get('photo_id', 'unknown')}) upserted successfully.")
151
+ start = start + 1
152
+ progress = int((start) / end * 100)
153
+ progress_bar.progress(progress)
154
+ progress_bar.empty()
155
+ success_message.success("All data loaded and added to the database successfully!")
156
+ st.session_state.load_clicked = False
157
+ st.rerun()
158
+
159
+ except Exception as e:
160
+ st.error(f"Error loading data: {e}")
161
+ logger.error(f"Error loading data: {e}")
162
+ st.session_state.load_clicked = False
163
+
164
+
165
+ def about_us():
166
+ if st.button("About us"):
167
+ st.write("""
168
+ This app allows you to search for images in two powerful ways:
169
+
170
+ 1. **Text-based Query**: You can simply type a description or keyword, and we will fetch the most relevant images from our database.
171
+
172
+ 2. **Image-based Query**: Alternatively, you can upload an image, and we'll search for similar images based on your input image.
173
+
174
+ Whether you're looking for images based on a specific text query or searching using an image, our app makes it easy to find exactly what you're looking for. Simply enter your query and get results instantly!
175
+
176
+ Explore and discover the images you need. Enjoy the search experience! 😊
177
+ """)
src/app/pages/__pycache__/load_data_page.cpython-313.pyc ADDED
Binary file (3.42 kB). View file
 
src/app/pages/load_data_page.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import homepage
2
+
3
+ homepage.setup_page()
4
+ homepage.load_data()
5
+
src/database_pinecone/__pycache__/create_database.cpython-313.pyc CHANGED
Binary files a/src/database_pinecone/__pycache__/create_database.cpython-313.pyc and b/src/database_pinecone/__pycache__/create_database.cpython-313.pyc differ
 
src/database_pinecone/__pycache__/querry_database.cpython-313.pyc CHANGED
Binary files a/src/database_pinecone/__pycache__/querry_database.cpython-313.pyc and b/src/database_pinecone/__pycache__/querry_database.cpython-313.pyc differ
 
src/database_pinecone/create_database.py CHANGED
@@ -5,10 +5,9 @@ sys.path.append(src_directory)
5
  from pinecone import Pinecone, ServerlessSpec
6
  import time
7
  from model.clip_model import ClipModel
8
- from data import request_images
9
- from data import data_set
10
  from config import config
11
  from utils import logger
 
12
 
13
  config = config.load_config()
14
  logger = logger.get_logger()
@@ -57,38 +56,44 @@ def get_index():
57
  logger.info(f"Error occurred while getting or creating the Pinecone index: {str(e)}", exc_info=True)
58
  return index
59
 
60
- def upsert_data(index,embeddings,id,url):
61
- try :
62
- logger.info("Started to upsert the data")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  index.upsert(
64
  vectors=[{
65
- "id": id,
66
  "values": embeddings,
67
  "metadata": {
68
- "url": url,
69
- "photo_id": id
70
  }
71
  }],
72
  namespace="image-search-dataset",
73
  )
74
- logger.info(f"Successfully upserted the data in database")
 
 
75
  except Exception as e:
76
- logger.info(f"Unable to upsert the data {e}")
77
- raise
78
-
79
- def add_data_to_database(df):
80
- try:
81
- index = get_index()
82
- logger.info("Starting to add the embeddings to the database")
83
- for _, data in df.iterrows():
84
- url = data['photo_image_url']
85
- id = data['photo_id']
86
- embeddings = clip_model.get_image_embedding(url)
87
- upsert_data(index,embeddings,id,url)
88
- logger.info("Added embeddings to the database successfully")
89
- except Exception as e:
90
- logger.info("Unable to add the data. Error : {e}")
91
-
92
-
93
- # df = data_set.get_df(8000,8500)
94
- # add_data_to_database(df)
 
5
  from pinecone import Pinecone, ServerlessSpec
6
  import time
7
  from model.clip_model import ClipModel
 
 
8
  from config import config
9
  from utils import logger
10
+ import pandas as pd
11
 
12
  config = config.load_config()
13
  logger = logger.get_logger()
 
56
  logger.info(f"Error occurred while getting or creating the Pinecone index: {str(e)}", exc_info=True)
57
  return index
58
 
59
+ def process_and_upsert_data(index, data: pd.Series, url_key: str, id_key: str):
60
+ """
61
+ Processes a single row of data (pandas Series) by extracting the URL and ID, generating image embeddings using
62
+ a clip model, and then upserting the generated embeddings into a pinecone database index.
63
+
64
+ This function handles:
65
+ - Extracting the URL and ID from the provided `data` (a pandas Series) using the specified keys (`url_key` and `id_key`).
66
+ - Using the `clip_model` to generate embeddings for the image found at the extracted URL.
67
+ - Upserting the generated embeddings, along with the photo ID and URL, into the pinecone database index using the `upsert` method.
68
+
69
+ Args:
70
+ data (pandas.Series): A single row of data from the DataFrame, containing the URL and ID.
71
+ url_key (str): The column name in the Series that contains the URL of the image.
72
+ id_key (str): The column name in the Series that contains the photo ID.
73
+
74
+ """
75
+ # Validate if the required columns exist in the row (Series)
76
+ if url_key not in data or id_key not in data:
77
+ raise ValueError(f"Missing required keys: '{url_key}' or '{id_key}' in the data")
78
+
79
+ try:
80
+ logger.info("Started to process and upsert the data")
81
+ url = data[url_key]
82
+ photo_id = data[id_key]
83
+ embeddings = clip_model.get_image_embedding(url)
84
  index.upsert(
85
  vectors=[{
86
+ "id": photo_id,
87
  "values": embeddings,
88
  "metadata": {
89
+ "url": url,
90
+ "photo_id": photo_id
91
  }
92
  }],
93
  namespace="image-search-dataset",
94
  )
95
+ logger.info(f"Successfully upserted data for photo_id {photo_id} with URL {url}")
96
+ except ValueError as ve:
97
+ logger.error(f"ValueError: {ve}")
98
  except Exception as e:
99
+ logger.error(f"Error processing row with photo_id {data.get(id_key, 'unknown')}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/model/__pycache__/clip_model.cpython-313.pyc CHANGED
Binary files a/src/model/__pycache__/clip_model.cpython-313.pyc and b/src/model/__pycache__/clip_model.cpython-313.pyc differ