Spaces:

team-wonders
/

internal-v0

Running

App Files Files Community

carlosh93 commited on 3 days ago

Commit

ed8368e

1 Parent(s): 0bd89b7

updating new version with supabase and vlm

Browse files

Files changed (10) hide show

app.py +3 -1
config/settings.py +5 -2
logic/data_utils.py +36 -4
logic/handlers.py +87 -18
logic/supabase_client.py +193 -0
logic/vlm.py +440 -0
requirements.txt +67 -12
ui/layout.py +308 -43
ui/main_page.py +40 -0
ui/selection_page.py +20 -2

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 # import spacy.cli
 # spacy.cli.download("ja_core_news_sm")
 # spacy.cli.download("zh_core_web_sm")
 import spacy_udpipe
 spacy_udpipe.download("ja")
 spacy_udpipe.download("zh")
@@ -16,7 +18,7 @@ metadata = load_metadata()
 demo = build_ui(concepts, metadata, HF_API_TOKEN, HF_DATASET_NAME)
 # demo.launch()
-demo.launch(debug=False)
 demo.close()
 # gr.close_all()

 # import spacy.cli
 # spacy.cli.download("ja_core_news_sm")
 # spacy.cli.download("zh_core_web_sm")
+import os
+os.environ["TF_USE_LEGACY_KERAS"] = "1"
 import spacy_udpipe
 spacy_udpipe.download("ja")
 spacy_udpipe.download("zh")
 demo = build_ui(concepts, metadata, HF_API_TOKEN, HF_DATASET_NAME)
 # demo.launch()
+demo.launch(debug=False, server_port=7861)
 demo.close()
 # gr.close_all()

config/settings.py CHANGED Viewed

@@ -3,6 +3,9 @@ import os
 load_dotenv()
-HF_API_TOKEN = os.getenv("HF_API_TOKEN")
 HF_DATASET_NAME = os.getenv("HF_DATASET_NAME")
-LOCAL_DS_DIRECTORY_PATH = os.getenv("LOCAL_DS_DIRECTORY_PATH")

 load_dotenv()
+HF_API_TOKEN = os.getenv("HF_TOKEN")
 HF_DATASET_NAME = os.getenv("HF_DATASET_NAME")
+LOCAL_DS_DIRECTORY_PATH = os.getenv("LOCAL_DS_DIRECTORY_PATH")
+SUPABASE_URL: str = os.getenv("SUPABASE_URL")
+SUPABASE_KEY: str = os.getenv("SUPABASE_KEY")
+REDIRECT_TO_URL: str = os.getenv("REDIRECT_TO_URL")

logic/data_utils.py CHANGED Viewed

@@ -8,7 +8,7 @@ import uuid
 import gradio as gr
 from PIL import Image
 import numpy as np
 def load_concepts(path="data/concepts.json"):
     with open(path, encoding='utf-8') as f:
@@ -53,6 +53,9 @@ class CustomHFDatasetSaver:
         self.local_ds_folder = local_ds_folder
         os.makedirs(self.local_ds_folder, exist_ok=True)
         self.data_outputs = data_outputs  # list of components to read values from
         # create scheduler to commit the data to the hub every x minutes
@@ -63,6 +66,27 @@ class CustomHFDatasetSaver:
             every=1,
             token=self.api_token,
         )
     def validate_data(self, values_dic):
         """
@@ -166,10 +190,16 @@ class CustomHFDatasetSaver:
             values_dic["id"] = f'{country}_{language}_{category}_{concept}_{current_timestamp}'
         #prepare the main directory of the sample
-        if values_dic.get("username"):
-            sample_dir = os.path.join("logged_in_users", values_dic["country"], values_dic["language"], values_dic["username"], str(current_timestamp))
         else:
             sample_dir = os.path.join("anonymous_users", values_dic["country"], values_dic["language"], str(uuid.uuid4()), str(current_timestamp))
         os.makedirs(os.path.join(self.local_ds_folder, sample_dir), exist_ok=True)
@@ -217,6 +247,8 @@ class CustomHFDatasetSaver:
             # "image_file": image_file_path_on_hub,
             "image_url": values_dic['image_url'] or "",
             "caption": values_dic['caption'] or "",
             "country": values_dic['country'] or "",
             "language": values_dic['language'] or "",
             "category": values_dic['category'] or "",
@@ -227,7 +259,7 @@ class CustomHFDatasetSaver:
             "category_4_concepts": values_dic.get('category_4_concepts') or [""],
             "category_5_concepts": values_dic.get('category_5_concepts') or [""],
             "timestamp": current_timestamp,
-            "username": values_dic['username'] or "",
             "password": values_dic['password'] or "",
             "id": values_dic['id'],
             "excluded": False if values_dic.get('excluded') is None else bool(values_dic.get('excluded')),

 import gradio as gr
 from PIL import Image
 import numpy as np
+from logic.supabase_client import auth_handler
 def load_concepts(path="data/concepts.json"):
     with open(path, encoding='utf-8') as f:
         self.local_ds_folder = local_ds_folder
         os.makedirs(self.local_ds_folder, exist_ok=True)
+        # Migrate any existing JSON files to include new VLM fields
+        self._migrate_existing()
         self.data_outputs = data_outputs  # list of components to read values from
         # create scheduler to commit the data to the hub every x minutes
             every=1,
             token=self.api_token,
         )
+    def _migrate_existing(self):
+        """
+        Ensure all existing JSON sample files have the same schema
+        by adding missing keys for 'vlm_caption' and 'vlm_feedback'.
+        """
+        for root, _, files in os.walk(self.local_ds_folder):
+            for fname in files:
+                if fname.endswith('.json'):
+                    fpath = os.path.join(root, fname)
+                    with open(fpath, 'r+', encoding='utf-8') as f:
+                        data = json.load(f)
+                        updated = False
+                        for key in ['vlm_caption', 'vlm_feedback']:
+                            if key not in data:
+                                data[key] = ""
+                                updated = True
+                        if updated:
+                            f.seek(0)
+                            json.dump(data, f, indent=2)
+                            f.truncate()
     def validate_data(self, values_dic):
         """
             values_dic["id"] = f'{country}_{language}_{category}_{concept}_{current_timestamp}'
         #prepare the main directory of the sample
+        # here we check if the user is logged in or not
+        user_info = auth_handler.is_logged_in(values_dic.get("client", None))
+        print(f"User info: {user_info}")
+        if user_info['success']:
+            # sample_dir = os.path.join("logged_in_users", values_dic["country"], values_dic["language"], values_dic["username"], str(current_timestamp))
+            sample_dir = os.path.join("logged_in_users", values_dic["country"], values_dic["language"], user_info['email'], str(current_timestamp))
+            print(f"Sample directory for logged in user: {sample_dir}")
         else:
             sample_dir = os.path.join("anonymous_users", values_dic["country"], values_dic["language"], str(uuid.uuid4()), str(current_timestamp))
+        print(f"Sample directory: {sample_dir}")
         os.makedirs(os.path.join(self.local_ds_folder, sample_dir), exist_ok=True)
             # "image_file": image_file_path_on_hub,
             "image_url": values_dic['image_url'] or "",
             "caption": values_dic['caption'] or "",
+            "vlm_caption": values_dic['vlm_caption'] or "",
+            "vlm_feedback": values_dic['vlm_feedback'] or "",
             "country": values_dic['country'] or "",
             "language": values_dic['language'] or "",
             "category": values_dic['category'] or "",
             "category_4_concepts": values_dic.get('category_4_concepts') or [""],
             "category_5_concepts": values_dic.get('category_5_concepts') or [""],
             "timestamp": current_timestamp,
+            "username": user_info['email'] if user_info['success'] else "",
             "password": values_dic['password'] or "",
             "id": values_dic['id'],
             "excluded": False if values_dic.get('excluded') is None else bool(values_dic.get('excluded')),

logic/handlers.py CHANGED Viewed

@@ -4,6 +4,7 @@ import io
 import PIL
 import requests
 from typing import Literal
 from datasets import load_dataset, concatenate_datasets, Image
 from data.lang2eng_map import lang2eng_mapping
@@ -12,6 +13,7 @@ import gradio as gr
 import bcrypt
 from config.settings import HF_API_TOKEN
 from huggingface_hub import snapshot_download
 # from .blur import blur_faces, detect_faces
 from retinaface import RetinaFace
 from gradio_modal import Modal
@@ -71,13 +73,13 @@ def clear_data(message: Literal["submit", "remove"] | None = None):
         gr.Info("If you logged in, you will soon see it at the bottom of the page, where you can edit it or delete it", title="Thank you for submitting your data! 🎉", duration=5)
     elif message == "remove":
         gr.Info("", title="Your data has been deleted! 🗑️", duration=5)
-    return (None, None, None, None, None, gr.update(value=None),
             gr.update(value=[]), gr.update(value=[]), gr.update(value=[]),
             gr.update(value=[]), gr.update(value=[]))
 def exit():
-    return (None, None, None, gr.Dataset(samples=[]), gr.Markdown("**Loading your data, please wait ...**"),
             gr.update(value=None), gr.update(value=None), [None, None, "", ""], gr.update(value=None),
             gr.update(value=None), gr.update(value=None),
             gr.update(value=None), gr.update(value=None), gr.update(value=None),
@@ -87,9 +89,8 @@ def exit():
 def validate_metadata(country, language):
     # Perform your validation logic here
     if country is None or language is None:
-        return gr.Button("Proceed", interactive=False)
-    return gr.Button("Proceed", interactive=True)
 def validate_inputs(image, ori_img, concept): # is_blurred
@@ -129,6 +130,30 @@ def validate_inputs(image, ori_img, concept): # is_blurred
     return gr.Button("Submit", variant="primary", interactive=True), result_image, ori_img # is_blurred
 def count_words(caption, language):
     match language:
@@ -152,8 +177,14 @@ def add_prefix(example, column_name, prefix):
     example[column_name] = (f"{prefix}/" + example[column_name])
     return example
-def update_user_data(username, password, country, language_choice, HF_DATASET_NAME, local_ds_directory_path):
     datasets_list = []
     # Try loading local dataset
     try:
@@ -191,18 +222,19 @@ def update_user_data(username, password, country, language_choice, HF_DATASET_NA
     # Handle all empty
     if not datasets_list:
         if username:  # User is logged in but has no data
-            return gr.Dataset(samples=[]), gr.Markdown("<p style='color: red;'>No data available for this user. Please upload an image.</p>")
         else:  # No user logged in
-            return gr.Dataset(samples=[]), gr.Markdown("")
     dataset = concatenate_datasets(datasets_list)
     # TODO: we should link username with password and language and country, otherwise there will be an error when loading with different language and clicking on the example
-    if username and password:
-        user_dataset = dataset.filter(lambda x: x['username'] == username and is_password_correct(x['password'], password))
         user_dataset = user_dataset.sort('timestamp', reverse=True)
         # Show only unique entries (most recent)
         user_ids = set()
         samples = []
         for d in user_dataset:
             if d['id'] in user_ids:
                 continue
@@ -229,6 +261,10 @@ def update_user_data(username, password, country, language_choice, HF_DATASET_NA
                     d['image_file'], d['image_url'], d['caption'] or "", d['country'],
                     d['language'], d['category'], d['concept'], additional_concepts_by_category, d['id']] # d['is_blurred']
             )
         # return gr.Dataset(samples=samples), None
         # ───────────────────────────────────────────────────
         # Clean up the “Additional Concepts” column (index 7)
@@ -255,10 +291,14 @@ def update_user_data(username, password, country, language_choice, HF_DATASET_NA
             row_copy[7] = ", ".join(vals)
             cleaned.append(row_copy)
-        return gr.Dataset(samples=cleaned), None
     else:
         # TODO: should we show the entire dataset instead? What about "other data" tab?
-        return gr.Dataset(samples=[]), None
 def update_language(local_storage, metadata_dict, concepts_dict):
@@ -357,7 +397,7 @@ def update_intro_language(selected_country, selected_language, intro_markdown, m
     return gr.Markdown(INTRO_TEXT)
-def handle_click_example(user_examples, concepts_dict):
     # print("handle_click_example")
     # print(user_examples)
     # ex = [item for item in user_examples]
@@ -365,7 +405,6 @@ def handle_click_example(user_examples, concepts_dict):
     # 1) Turn the flat string in slot 7 back into a list-of-lists
     ex = list(user_examples)
     raw_ac = ex[7] if len(ex) > 7 else ""
     country_btn = ex[3]
     language_btn = ex[4]
     concepts = concepts_dict[country_btn][language_btn]
@@ -441,7 +480,13 @@ def handle_click_example(user_examples, concepts_dict):
     #         dropdown_values.append(None)
     # Need to return values for each category dropdown
-    return [image_inp, image_url_inp, long_caption_inp, exampleid_btn, category_btn, concept_btn] + additional_concepts_by_category + [True]
     # return [
     #     image_inp,
     #     image_url_inp,
@@ -535,8 +580,8 @@ def blur_selected_faces(image, blur_faces_ids, faces_info, face_img, faces_count
     parsed_faces_ids = [f"face_{val.split(':')[-1].strip()}" for val in parsed_faces_ids]
     # Base blur amount and bounds
-    MIN_BLUR = 31  # Minimum blur amount (must be odd)
-    MAX_BLUR = 131  # Maximum blur amount (must be odd)
     blurring_start = time.time()
     # Process each face
@@ -688,4 +733,28 @@ def check_exclude_fn(image):
 def has_user_json(username, country,language_choice, local_ds_directory_path):
     """Check if JSON files exist for username pattern."""
-    return bool(glob.glob(os.path.join(local_ds_directory_path, "logged_in_users", country, language_choice, username, "**", "*.json"), recursive=True))

 import PIL
 import requests
 from typing import Literal
+from logic.supabase_client import auth_handler
 from datasets import load_dataset, concatenate_datasets, Image
 from data.lang2eng_map import lang2eng_mapping
 import bcrypt
 from config.settings import HF_API_TOKEN
 from huggingface_hub import snapshot_download
+from logic.vlm import vlm_manager
 # from .blur import blur_faces, detect_faces
 from retinaface import RetinaFace
 from gradio_modal import Modal
         gr.Info("If you logged in, you will soon see it at the bottom of the page, where you can edit it or delete it", title="Thank you for submitting your data! 🎉", duration=5)
     elif message == "remove":
         gr.Info("", title="Your data has been deleted! 🗑️", duration=5)
+    return (None, None, None, gr.update(value=None), gr.update(value=None, visible=False), gr.update(visible=False), gr.update(interactive=True), gr.update(interactive=True), None, None, gr.update(value=None),
             gr.update(value=[]), gr.update(value=[]), gr.update(value=[]),
             gr.update(value=[]), gr.update(value=[]))
 def exit():
+    return (None, None, None, gr.update(value=None), gr.update(value=None, visible=False), gr.update(visible=False), gr.update(interactive=True), gr.update(interactive=True), gr.Dataset(samples=[]), gr.Markdown("**Loading your data, please wait ...**"),
             gr.update(value=None), gr.update(value=None), [None, None, "", ""], gr.update(value=None),
             gr.update(value=None), gr.update(value=None),
             gr.update(value=None), gr.update(value=None), gr.update(value=None),
 def validate_metadata(country, language):
     # Perform your validation logic here
     if country is None or language is None:
+        return gr.update(interactive=False)
+    return gr.update(interactive=True)
 def validate_inputs(image, ori_img, concept): # is_blurred
     return gr.Button("Submit", variant="primary", interactive=True), result_image, ori_img # is_blurred
+def generate_vlm_caption(image, model_name="SmolVLM-500M"): # processor, model
+    """
+    Generate a caption for the given image using a Vision-Language Model.
+    Uses the global VLMManager for efficient model loading and caching.
+    """
+    if image is None:
+        gr.Warning("⚠️ Please upload an image first.", duration=5)
+        return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=True), gr.update(interactive=True)
+    try:
+        # Use the global VLMManager to load/get the model
+        vlm_manager.load_model(model_name)
+        caption = vlm_manager.generate_caption(image)
+    except Exception as e:
+        print(f"Error generating caption: {e}. Cleaning up memory and try again.")
+        gr.Warning(f"⚠️ Error generating caption: {e} due to memory issues. Please try again.", duration=5)
+        # vlm_manager.cleanup_memory()
+        return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=True), gr.update(interactive=True)
+    finally: # For now, let's cleanup memory after each generation
+        vlm_manager.cleanup_memory()
+    # print(caption)
+    return caption, gr.update(visible=True), gr.update(visible=True), gr.update(interactive=False), gr.update(interactive=False)
 def count_words(caption, language):
     match language:
     example[column_name] = (f"{prefix}/" + example[column_name])
     return example
+def update_user_data(client , country, language_choice, HF_DATASET_NAME, local_ds_directory_path):
+    user_info = auth_handler.is_logged_in(client)
+    print(f"User info: {user_info}")
+    if not user_info['success']:
+        print("User is not logged in or session expired.")
+        return gr.Dataset(samples=[]), None, None
+    username = user_info['email']
     datasets_list = []
     # Try loading local dataset
     try:
     # Handle all empty
     if not datasets_list:
         if username:  # User is logged in but has no data
+            return gr.Dataset(samples=[]), gr.Markdown("<p style='color: red;'>No data available for this user. Please upload an image.</p>"), None
         else:  # No user logged in
+            return gr.Dataset(samples=[]), gr.Markdown(""), None
     dataset = concatenate_datasets(datasets_list)
     # TODO: we should link username with password and language and country, otherwise there will be an error when loading with different language and clicking on the example
+    if username:
+        user_dataset = dataset.filter(lambda x: x['username'] == username)
         user_dataset = user_dataset.sort('timestamp', reverse=True)
         # Show only unique entries (most recent)
         user_ids = set()
         samples = []
+        vlm_captions = dict()
         for d in user_dataset:
             if d['id'] in user_ids:
                 continue
                     d['image_file'], d['image_url'], d['caption'] or "", d['country'],
                     d['language'], d['category'], d['concept'], additional_concepts_by_category, d['id']] # d['is_blurred']
             )
+            if 'vlm_caption' in d:
+                vlm_captions[d['id']] = d.get('vlm_caption', "")
         # return gr.Dataset(samples=samples), None
         # ───────────────────────────────────────────────────
         # Clean up the “Additional Concepts” column (index 7)
             row_copy[7] = ", ".join(vals)
             cleaned.append(row_copy)
+        # check if vlm_captions is an empty dictionary
+        if not vlm_captions:
+            vlm_captions = None
+        return gr.Dataset(samples=cleaned), None, vlm_captions
     else:
         # TODO: should we show the entire dataset instead? What about "other data" tab?
+        return gr.Dataset(samples=[]), None, None
 def update_language(local_storage, metadata_dict, concepts_dict):
     return gr.Markdown(INTRO_TEXT)
+def handle_click_example(user_examples, vlm_captions, concepts_dict):
     # print("handle_click_example")
     # print(user_examples)
     # ex = [item for item in user_examples]
     # 1) Turn the flat string in slot 7 back into a list-of-lists
     ex = list(user_examples)
     raw_ac = ex[7] if len(ex) > 7 else ""
     country_btn = ex[3]
     language_btn = ex[4]
     concepts = concepts_dict[country_btn][language_btn]
     #         dropdown_values.append(None)
     # Need to return values for each category dropdown
+    vlm_caption = None
+    if vlm_captions:
+        if exampleid_btn in vlm_captions:
+            vlm_caption = vlm_captions[exampleid_btn]
+    return [image_inp, image_url_inp, long_caption_inp, exampleid_btn, category_btn, concept_btn] + additional_concepts_by_category + [True] + [vlm_caption]  # loading_example flag + vlm_caption
     # return [
     #     image_inp,
     #     image_url_inp,
     parsed_faces_ids = [f"face_{val.split(':')[-1].strip()}" for val in parsed_faces_ids]
     # Base blur amount and bounds
+    MIN_BLUR = 131  # Minimum blur amount (must be odd)
+    MAX_BLUR = 351  # Maximum blur amount (must be odd)
     blurring_start = time.time()
     # Process each face
 def has_user_json(username, country,language_choice, local_ds_directory_path):
     """Check if JSON files exist for username pattern."""
+    return bool(glob.glob(os.path.join(local_ds_directory_path, "logged_in_users", country, language_choice, username, "**", "*.json"), recursive=True))
+def submit_button_clicked(vlm_output):
+    if vlm_output is None or vlm_output == '':
+        return Modal(visible=True), Modal(visible=False)
+    else:
+        return Modal(visible=False), Modal(visible=True)
+# def submit_button_clicked(vlm_output, save_fn, data_outputs):
+#     if vlm_output is None:
+#         return Modal(visible=True)
+#     else:
+#         try:
+#             save_fn(list(data_outputs.values()))
+#         except Exception as e:
+#             gr.Error(f"⚠️ Error saving data: {e}")
+#         try:
+#             image_inp, image_url_inp, long_caption_inp, vlm_output, vlm_feedback, exampleid_btn, category_btn, concept_btn, \
+#                 category_concept_dropdowns0, category_concept_dropdowns1, category_concept_dropdowns2, category_concept_dropdowns3, \
+#                     category_concept_dropdowns4 = clear_data("submit")
+#         except Exception as e:
+#             gr.Error(f"⚠️ Error clearing data: {e}")
+#         return Modal(visible=False)

logic/supabase_client.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import gradio as gr
+from supabase import create_client, Client
+import os
+from config.settings import SUPABASE_URL, SUPABASE_KEY, REDIRECT_TO_URL
+import traceback
+from supabase.lib.client_options import ClientOptions
+# --- Supabase Authentication Class ---
+class SupabaseAuth:
+    """A class to handle Supabase authentication logic."""
+    def __init__(self, url: str, key: str):
+        self.url = url
+        self.key = key
+        try:
+            self.client: Client = create_client(url, key)
+        except Exception as e:
+            print(f"Error creating Supabase client: {e}")
+            self.client = None
+    def login(self, email: str, password: str):
+        """
+        Attempts to log in a user and returns a user-specific client.
+        """
+        if not self.client:
+            return {'success': False, 'data': None, 'message': "Supabase client not initialized."}
+        try:
+            response = self.client.auth.sign_in_with_password({"email": email, "password": password})
+            user_session = response.session
+            # Create a new, authenticated client for this user
+            authenticated_client = create_client(
+                self.url,
+                self.key,
+                # options={"headers": {"Authorization": f"Bearer {user_session.access_token}"}}
+                options=ClientOptions(
+                    headers={"Authorization": f"Bearer {user_session.access_token}"},
+                )
+            )
+            authenticated_client.auth.set_session(user_session.access_token, user_session.refresh_token)
+            session_data = {
+                "refresh_token": user_session.refresh_token,
+                "user_email": user_session.user.email,
+                "client": authenticated_client
+            }
+            return {'success': True, 'data': session_data, 'message': f"Welcome, {user_session.user.email}!"}
+        except Exception as e:
+            # print(f"Error logging in: {e}")
+            # traceback.print_exc()
+            # Handle specific error messages for better user feedback
+            return {'success': False, 'data': None, 'message': f"Login failed: {e}"}
+    def sign_up(self, email: str, password: str):
+        """Signs up a new user."""
+        if not self.client:
+            return {'success': False, 'message': "Supabase client not initialized."}
+        try:
+            # Supabase sign_up returns a session if email confirmation is disabled,
+            # or just a user object if it's enabled. We'll just return a success message.
+            self.client.auth.sign_up({
+                "email": email,
+                "password": password,
+            })
+            return {'success': True, 'message': 'Sign up successful! You can login now.'}
+        except Exception as e:
+            return {'success': False, 'message': f"Sign up failed: {e}"}
+    def restore_session(self, refresh_token: str):
+        """
+        Attempts to restore a session using a refresh token.
+        """
+        if not self.client:
+            return {'success': False, 'data': None, 'message': "Supabase client not initialized."}
+        try:
+            response = self.client.auth.refresh_session(refresh_token)
+            user_session = response.session
+            authenticated_client = create_client(
+                self.url,
+                self.key,
+                options=ClientOptions(
+                    headers={"Authorization": f"Bearer {user_session.access_token}"},
+                )
+            )
+            authenticated_client.auth.set_session(user_session.access_token, user_session.refresh_token)
+            session_data = {
+                "refresh_token": user_session.refresh_token,
+                "user_email": user_session.user.email,
+                "client": authenticated_client
+            }
+            print("Session restored successfully:", session_data)
+            return {'success': True, 'data': session_data, 'message': f"Welcome, {user_session.user.email}!"}
+        except Exception as e:
+            print("failed to restore session:", e)
+            return {'success': False, 'data': None, 'message': f"Failed to restore session: {e}"}
+    def logout(self, user_client: Client):
+        """Signs out the user from Supabase, invalidating the token."""
+        if not user_client:
+            return {'success': False, 'message': 'No user client provided to log out.'}
+        try:
+            user_client.auth.sign_out()
+            return {'success': True, 'message': 'Successfully signed out from Supabase.'}
+        except Exception as e:
+            # It's often safe to ignore errors here (e.g., if token already expired)
+            # but we'll log it for debugging.
+            print(f"Error signing out from Supabase: {e}")
+            return {'success': False, 'message': f'Error signing out: {e}'}
+    def change_password(self, user_client: Client, new_password: str):
+        """Changes the user's password."""
+        if not user_client:
+            return {'success': False, 'message': 'No user client provided to change password.'}
+        try:
+            user_client.auth.update_user({"password": new_password})
+            return {'success': True, 'message': 'Password changed successfully.'}
+        except Exception as e:
+            return {'success': False, 'message': f'Error changing password: {e}'}
+    def is_logged_in(self, user_client: Client):
+        """Checks if a user is currently authenticated and returns their email."""
+        print("Checking if user is logged in...", user_client)
+        if not user_client:
+            return {'success': False, 'email': None, 'message': 'No user client provided.'}
+        try:
+            user_response = user_client.auth.get_user()
+            user = user_response.user
+            if user:
+                return {'success': True, 'email': user.email, 'message': f'Logged in as: {user.email}'}
+            else:
+                return {'success': False, 'email': None, 'message': 'User is not logged in.'}
+        except Exception as e:
+            # This might happen if the token has expired and can't be refreshed.
+            return {'success': False, 'email': None, 'message': f'Authentication check failed: {e}'}
+    def reset_password_for_email(self, email: str):
+        """
+        Sends a password reset email to the specified address.
+        """
+        if not self.client:
+            return {'success': False, 'message': "Supabase client not initialized."}
+        try:
+            self.client.auth.reset_password_for_email(
+                email,
+                {
+                    "redirect_to": str(REDIRECT_TO_URL),
+                }
+            )
+            return {'success': True, 'message': "Password reset email sent. Check your inbox!"}
+        except Exception as e:
+            return {'success': False, 'message': f"Failed to send reset email: {e}"}
+    def retrieve_session_from_tokens(self, access_token: str, refresh_token: str):
+        """
+        Retrieves a session from an access token and refresh token.
+        This is typically used after a password recovery link is clicked.
+        """
+        if not self.client:
+            return {'success': False, 'data': None, 'message': "Supabase client not initialized."}
+        try:
+            # Set the session on the main client to verify tokens and get user info
+            self.client.auth.set_session(access_token, refresh_token)
+            user_response = self.client.auth.get_user()
+            user = user_response.user
+            if not user:
+                 return {'success': False, 'data': None, 'message': "Could not retrieve user from tokens."}
+            # Create a new, authenticated client for this user, similar to login
+            authenticated_client = create_client(
+                self.url,
+                self.key,
+                options=ClientOptions(
+                    headers={"Authorization": f"Bearer {access_token}"},
+                )
+            )
+            authenticated_client.auth.set_session(access_token, refresh_token)
+            session_data = {
+                "refresh_token": refresh_token,
+                "user_email": user.email,
+                "client": authenticated_client
+            }
+            return {'success': True, 'data': session_data, 'message': f"Welcome, {user.email}!"}
+        except Exception as e:
+            return {'success': False, 'data': None, 'message': f"Failed to retrieve session from tokens: {e}"}
+auth_handler = SupabaseAuth(SUPABASE_URL, SUPABASE_KEY)

logic/vlm.py ADDED Viewed

	@@ -0,0 +1,440 @@

+import torch, torchvision.transforms as T
+from torchvision.transforms.functional import InterpolationMode
+from PIL import Image
+from transformers import TorchAoConfig, Qwen2_5_VLForConditionalGeneration, Gemma3ForConditionalGeneration, AutoTokenizer, AutoProcessor, AutoModelForVision2Seq, AutoModel
+from qwen_vl_utils import process_vision_info
+import gc
+# from transformers.image_utils import load_image
+IMAGENET_MEAN = (0.485, 0.456, 0.406)
+IMAGENET_STD  = (0.229, 0.224, 0.225)
+class VLMManager:
+    """
+    A manager class for Vision-Language Models that handles model loading,
+    caching, and dynamic switching between different models.
+    """
+    def __init__(self, default_model: str = "Gemma3-4B"):
+        """
+        Initialize the VLM Manager with a default model.
+        Args:
+            default_model (str): The default model to load initially.
+        """
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.current_model_name = None
+        self.processor = None
+        self.tokenizer = None  # Initialize tokenizer attribute
+        self.model = None
+        self.system_message = """
+        You are an expert cultural-aware image-analysis assistant. For every image:
+        1. Output exactly 40 words in total.
+        2. Use a single paragraph (no lists or bullet points).
+        3. Describe Who (appearance/emotion), What (action), and Where (setting).
+        4. Do NOT include opinions or speculations.
+        5. If you go over 40 words, shorten or remove non-essential details.
+        """
+        self.user_prompt = """
+        Given this image, please provide an image description of around 40 words with extensive and detailed visual information.
+        Descriptions must be objective: focus on how you would describe the image to someone who can't see it, without your own opinions/speculations.
+        The text needs to include the main concept and describe the content of the image in detail by including:
+        - Who?: The visual appearance and observable emotions (e.g., "is smiling") of persons and animals.
+        - What?: The actions performed in the image.
+        - Where?: The setting of the image, including the size, color, and relationships between objects.
+        """
+        # Load the default model
+        self.load_model(default_model)
+    def load_model(self, model_name: str):
+        """
+        Load a VLM model. If the model is already loaded, return the cached version.
+        Args:
+            model_name (str): The name of the model to load.
+        """
+        # If the requested model is already loaded, no need to reload
+        if self.current_model_name == model_name and self.model is not None:
+            print(f"Model {model_name} is already loaded, using cached version.")
+            if self.current_model_name == "InternVL3_5-8B":
+                return self.tokenizer, self.model
+            else:
+                return self.processor, self.model
+        print(f"Loading model: {model_name}")
+        # Clear current model from memory if exists
+        if self.model is not None:
+            del self.model
+            self.model = None
+            if self.current_model_name == "InternVL3_5-8B":
+                if hasattr(self, 'tokenizer') and self.tokenizer is not None:
+                    del self.tokenizer
+                    self.tokenizer = None
+            else:
+                if hasattr(self, 'processor') and self.processor is not None:
+                    del self.processor
+                    self.processor = None
+            # Force garbage collection and clear CUDA cache
+            gc.collect()
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                torch.cuda.synchronize()  # Wait for all operations to complete
+        # Load the new model
+        if model_name == "SmolVLM-500M":
+            self.processor, self.model = self._load_smolvlm_model("HuggingFaceTB/SmolVLM-500M-Instruct")
+        elif model_name == "Qwen2.5-VL-7B":
+            self.processor, self.model = self._load_qwen25_model("Qwen/Qwen2.5-VL-7B-Instruct")
+        elif model_name == "InternVL3_5-8B":
+            self.tokenizer, self.model = self._load_internvl35_model("OpenGVLab/InternVL3_5-8B-Instruct")
+        elif model_name == "Gemma3-4B":
+            self.processor, self.model = self._load_gemma3_model("google/gemma-3-4b-it")
+        else:
+            raise ValueError(f"Model {model_name} is not supported or not available.")
+        self.current_model_name = model_name
+        print(f"Successfully loaded model: {model_name}")
+    def generate_caption(self, image):
+        """
+        Generate a caption for the given image using the loaded model.
+        Args:
+            processor: The processor for the model.
+            model: The model to use for generating the caption.
+            image: The image to generate a caption for.
+        """
+        if self.current_model_name == "SmolVLM-500M":
+            return self._inference_smolvlm_model(image)
+        elif self.current_model_name == "Qwen2.5-VL-7B":
+            return self._inference_qwen25_model(image)
+        elif self.current_model_name == "InternVL3_5-8B":
+            return self._inference_internvl35_model(image)
+        elif self.current_model_name == "Gemma3-4B":
+            return self._inference_gemma3_model(image)
+        else:
+            raise ValueError(f"Model {self.current_model_name} is not supported or not available.")
+    def get_current_model(self):
+        """
+        Get the currently loaded model and processor.
+        Returns:
+            tuple: A tuple containing (processor, model, model_name).
+        """
+        return self.processor, self.model, self.current_model_name
+    def cleanup_memory(self):
+        """
+        Explicit memory cleanup method that can be called to free GPU memory.
+        """
+        if self.model is not None:
+            del self.model
+            self.model = None
+        if hasattr(self, 'processor') and self.processor is not None:
+            del self.processor
+            self.processor = None
+        if hasattr(self, 'tokenizer') and self.tokenizer is not None:
+            del self.tokenizer
+            self.tokenizer = None
+        self.current_model_name = None
+        # Force cleanup
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            torch.cuda.synchronize()
+        print("Memory cleanup completed.")
+    #########################################################
+    ## Load functions
+    def _load_smolvlm_model(self, model_name):
+        """Load SmolVLM model."""
+        processor = AutoProcessor.from_pretrained(model_name)
+        model = AutoModelForVision2Seq.from_pretrained(
+            model_name,
+            _attn_implementation="eager"
+        ).to(self.device)
+        model.eval()
+        return processor, model
+    def _load_qwen25_model(self, model_name):
+        """Load Qwen2.5-VL model."""
+        model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+            model_name, torch_dtype="auto", device_map="auto"
+        )
+        # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
+        # model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+        #     "Qwen/Qwen2.5-VL-7B-Instruct",
+        #     torch_dtype=torch.bfloat16,
+        #     attn_implementation="flash_attention_2",
+        #     device_map="auto",
+        # )
+        processor = AutoProcessor.from_pretrained(model_name)
+        model.eval()
+        return processor, model
+    def _load_internvl35_model(self, model_name):
+        """Load InternVL3.5 model."""
+        # Load tokenizer (InternVL uses tokenizer instead of processor for text)
+        tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+        # Load the model using AutoModel
+        model = AutoModel.from_pretrained(
+            model_name,
+            torch_dtype=torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16,
+            low_cpu_mem_usage=True,
+            use_flash_attn=False,                   # True set False if CUDA mismatch
+            trust_remote_code=True,
+            device_map="auto"
+        )
+        model.eval()
+        # Return tokenizer as processor for consistency with the interface
+        return tokenizer, model
+    def _load_gemma3_model(self, model_name):
+        """Load Gemma3 model."""
+        quantization_config = TorchAoConfig("int4_weight_only", group_size=128)
+        model = Gemma3ForConditionalGeneration.from_pretrained(
+            model_name,
+            device_map="auto",
+            quantization_config=quantization_config
+        )
+        processor = AutoProcessor.from_pretrained(model_name)
+        model.eval()
+        return processor, model
+    #########################################################
+    ## Inference functions
+    def check_processor_and_model(self):
+        if self.processor is None or self.model is None:
+            raise ValueError("Processor and model must be loaded before generating a caption.")
+    def _inference_qwen25_model(self, image):
+        """Inference Qwen2.5-VL model."""
+        self.check_processor_and_model()
+        messages = [
+            {
+                "role": "system",
+                "content": [{"type": "text", "text": self.system_message}]
+            },
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image",
+                        "image": Image.fromarray(image),
+                    },
+                    {"type": "text", "text": self.user_prompt},
+                ],
+            }
+        ]
+        # Preparation for inference
+        text = self.processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        image_inputs, video_inputs = process_vision_info(messages)
+        inputs = self.processor(
+            text=[text],
+            images=image_inputs,
+            videos=video_inputs,
+            padding=True,
+            return_tensors="pt",
+        )
+        inputs = inputs.to(self.model.device)
+        # Inference: Generation of the output
+        generated_ids = self.model.generate(**inputs, max_new_tokens=128)
+        generated_ids_trimmed = [
+            out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+        ]
+        caption = self.processor.batch_decode(
+            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+        )[0]
+        # Clean up tensors to free GPU memory
+        del inputs, generated_ids, generated_ids_trimmed
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        return caption
+    def _inference_gemma3_model(self, image):
+        """Inference Gemma3 model."""
+        self.check_processor_and_model()
+        messages = [
+            {
+                "role": "system",
+                "content": [{"type": "text", "text": self.system_message}]
+            },
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image", "image": Image.fromarray(image)},
+                    {"type": "text", "text": self.user_prompt}
+                ]
+            }
+        ]
+        inputs = self.processor.apply_chat_template(
+            messages, add_generation_prompt=True, tokenize=True,
+            return_dict=True, return_tensors="pt"
+        ).to(self.model.device, dtype=torch.bfloat16)
+        input_len = inputs["input_ids"].shape[-1]
+        with torch.inference_mode():
+            generation = self.model.generate(**inputs, max_new_tokens=100, do_sample=False)
+            generation = generation[0][input_len:]
+        caption = self.processor.decode(generation, skip_special_tokens=True)
+        # Clean up tensors to free GPU memory
+        del inputs, generation
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        return caption
+    def _inference_smolvlm_model(self, image):
+        self.check_processor_and_model()
+        messages = [
+            {
+                "role": "system",
+                "content": self.system_message
+            },
+            {
+            "role": "user",
+            "content": [
+                {"type": "image"},
+                {"type": "text", "text": self.user_prompt}
+            ]
+            }
+        ]
+        # Prepare inputs
+        prompt = self.processor.apply_chat_template(messages, add_generation_prompt=True)
+        inputs = self.processor(text=prompt, images=[image], return_tensors="pt")
+        inputs = inputs.to(self.model.device)
+        # Generate outputs
+        gen_kwargs = {
+        "max_new_tokens": 200,          # plenty for ~40 words
+            # "early_stopping": True,         # stop at first EOS
+            # "no_repeat_ngram_size": 3,      # discourage loops
+            # "length_penalty": 0.8,          # slightly favor brevity
+            # "eos_token_id": processor.tokenizer.eos_token_id,
+            # "pad_token_id": processor.tokenizer.eos_token_id,
+        }
+        generated_ids = self.model.generate(**inputs, **gen_kwargs) # max_new_tokens=500)
+        generated_texts = self.processor.batch_decode(
+            generated_ids,
+            skip_special_tokens=True,
+        )[0]
+        # Extract only what the assistant said
+        if "Assistant:" in generated_texts:
+            caption = generated_texts.split("Assistant:", 1)[1].strip()
+        else:
+            caption = generated_texts.strip()
+        # Clean up tensors to free GPU memory
+        del inputs, generated_ids
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        return caption
+    def _inference_internvl35_model(self, image):
+        if self.tokenizer is None:
+            raise ValueError("Tokenizer must be loaded before generating a caption for InternVL3.5.")
+        # image can be numpy (H,W,3) or PIL.Image
+        if hasattr(image, "shape"):  # numpy array
+            pil_image = Image.fromarray(image.astype("uint8"), mode="RGB")
+        else:
+            pil_image = image
+        pixel_values = self._image_to_pixel_values(pil_image, size=448, max_num=12)
+        pixel_values = pixel_values.to(dtype=torch.bfloat16, device=self.model.device)
+        # Format question with image token (matches official docs)
+        question = "<image>\n" + self.user_prompt
+        # Generation config matching official examples
+        gen_cfg = dict(
+            max_new_tokens=128,
+            do_sample=False,
+            temperature=0.0,
+            # Optional: add other parameters from docs
+            # top_p=0.9,
+            # repetition_penalty=1.1
+        )
+        # Use model's chat method (official approach)
+        response = self.model.chat(self.tokenizer, pixel_values, question, gen_cfg)
+        # Clean up tensors to free GPU memory
+        del pixel_values
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        return response.strip()
+    def _image_to_pixel_values(self, img, size=448, max_num=12):
+        transform = self._build_transform(size)
+        tiles = self._dynamic_preprocess(img, image_size=size, max_num=max_num, use_thumbnail=True)
+        pixel_values = torch.stack([transform(t) for t in tiles])
+        return pixel_values
+    def _dynamic_preprocess(self, image, min_num=1, max_num=12, image_size=448, use_thumbnail=True):
+        # same logic as the model card: split into tiles based on aspect ratio
+        w, h = image.size
+        aspect = w / h
+        targets = sorted({(i, j) for n in range(min_num, max_num+1)
+                        for i in range(1, n+1) for j in range(1, n+1)
+                        if i*j <= max_num and i*j >= min_num},
+                        key=lambda x: x[0]*x[1])
+        # pick closest ratio
+        best = min(targets, key=lambda r: abs(aspect - r[0]/r[1]))
+        tw, th = image_size * best[0], image_size * best[1]
+        resized = image.resize((tw, th))
+        tiles = []
+        for i in range(best[0] * best[1]):
+            box = ((i % (tw // image_size)) * image_size,
+                (i // (tw // image_size)) * image_size,
+                ((i % (tw // image_size)) + 1) * image_size,
+                ((i // (tw // image_size)) + 1) * image_size)
+            tiles.append(resized.crop(box))
+        if use_thumbnail and len(tiles) != 1:
+            tiles.append(image.resize((image_size, image_size)))
+        return tiles
+    def _build_transform(self, input_size=448):
+        return T.Compose([
+            T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
+            T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
+            T.ToTensor(),
+            T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
+        ])
+# Global VLM Manager instance
+vlm_manager = VLMManager()

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 absl-py==2.2.2
 aiofiles==23.2.1
 aiohappyeyeballs==2.6.1
 aiohttp==3.11.16
@@ -8,15 +9,25 @@ anyio==4.9.0
 astunparse==1.6.3
 async-timeout==5.0.1
 attrs==25.3.0
 bcrypt==4.3.0
 beautifulsoup4==4.13.3
 certifi==2025.1.31
 charset-normalizer==3.4.1
 click==8.1.8
 cycler==0.12.1
 datasets==3.5.0
 deep-translator==1.11.4
 dill==0.3.8
 et_xmlfile==2.0.0
 exceptiongroup==1.2.2
 fastapi==0.115.12
@@ -41,16 +52,36 @@ huggingface-hub==0.30.1
 idna==3.10
 Jinja2==3.1.6
 keras==3.9.2
 libclang==18.1.1
 Markdown==3.8
 markdown-it-py==3.0.0
 MarkupSafe==3.0.2
 mdurl==0.1.2
 ml_dtypes==0.5.1
 multidict==6.3.2
 multiprocess==0.70.16
 namex==0.0.8
 numpy==2.1.3
 opencv-python==4.11.0.86
 openpyxl==3.1.5
 opt_einsum==3.4.0
@@ -59,54 +90,78 @@ orjson==3.10.16
 packaging==24.2
 pandas==2.2.3
 pillow==11.1.0
 propcache==0.3.1
 protobuf==5.29.4
 pyarrow==19.0.1
-pydantic==2.11.2
-pydantic_core==2.33.1
 pydub==0.25.1
 Pygments==2.19.1
 PySocks==1.7.1
 python-dateutil==2.9.0.post0
 python-dotenv==1.1.0
 python-multipart==0.0.20
 pytz==2025.2
 PyYAML==6.0.2
 requests==2.32.3
 retina-face==0.0.17
 rich==14.0.0
 ruff==0.11.4
 safehttpx==0.1.6
 semantic-version==2.10.0
 shellingham==1.5.4
 six==1.17.0
 sniffio==1.3.1
 soupsieve==2.6
 starlette==0.46.1
 tensorboard==2.19.0
 tensorboard-data-server==0.7.2
 tensorflow==2.19.0
 tensorflow-io-gcs-filesystem==0.37.1
 termcolor==3.0.1
 tf_keras==2.19.0
 tomlkit==0.13.2
 tqdm==4.67.1
 typer==0.15.2
-typing-inspection==0.4.0
-typing_extensions==4.12.2
 tzdata==2025.2
 urllib3==2.3.0
 uvicorn==0.34.0
 websockets==15.0.1
 Werkzeug==3.1.3
 wrapt==1.17.2
 xxhash==3.5.0
 yarl==1.19.0
-spacy_udpipe==1.0.0
-pyuca==1.2
-pillow_heif==1.0.0
-spacy==3.8.7
-spacy-legacy==3.0.12
-spacy-loggers==1.0.5
-spacy_thai==0.7.8
-spacy-udpipe==1.0.0

 absl-py==2.2.2
+accelerate==1.9.0
 aiofiles==23.2.1
 aiohappyeyeballs==2.6.1
 aiohttp==3.11.16
 astunparse==1.6.3
 async-timeout==5.0.1
 attrs==25.3.0
+av==15.1.0
 bcrypt==4.3.0
 beautifulsoup4==4.13.3
+bitsandbytes==0.46.1
+blis==1.3.0
+catalogue==2.0.10
 certifi==2025.1.31
 charset-normalizer==3.4.1
 click==8.1.8
+cloudpathlib==0.21.1
+confection==0.1.5
 cycler==0.12.1
+cymem==2.0.11
 datasets==3.5.0
+decord==0.6.0
 deep-translator==1.11.4
+deplacy==2.1.0
 dill==0.3.8
+einops==0.8.1
 et_xmlfile==2.0.0
 exceptiongroup==1.2.2
 fastapi==0.115.12
 idna==3.10
 Jinja2==3.1.6
 keras==3.9.2
+langcodes==3.5.0
+language_data==1.3.0
 libclang==18.1.1
+marisa-trie==1.2.1
 Markdown==3.8
 markdown-it-py==3.0.0
 MarkupSafe==3.0.2
 mdurl==0.1.2
 ml_dtypes==0.5.1
+mpmath==1.3.0
 multidict==6.3.2
 multiprocess==0.70.16
+murmurhash==1.0.13
 namex==0.0.8
+networkx==3.4.2
 numpy==2.1.3
+nvidia-cublas-cu12==12.6.4.1
+nvidia-cuda-cupti-cu12==12.6.80
+nvidia-cuda-nvrtc-cu12==12.6.77
+nvidia-cuda-runtime-cu12==12.6.77
+nvidia-cudnn-cu12==9.5.1.17
+nvidia-cufft-cu12==11.3.0.4
+nvidia-cufile-cu12==1.11.1.6
+nvidia-curand-cu12==10.3.7.77
+nvidia-cusolver-cu12==11.7.1.2
+nvidia-cusparse-cu12==12.5.4.2
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.6.85
+nvidia-nvtx-cu12==12.6.77
 opencv-python==4.11.0.86
 openpyxl==3.1.5
 opt_einsum==3.4.0
 packaging==24.2
 pandas==2.2.3
 pillow==11.1.0
+pillow_heif==1.0.0
+preshed==3.0.10
 propcache==0.3.1
 protobuf==5.29.4
+psutil==7.0.0
 pyarrow==19.0.1
+pydantic
+pydantic_core
 pydub==0.25.1
 Pygments==2.19.1
 PySocks==1.7.1
+pythainlp==5.1.2
 python-dateutil==2.9.0.post0
 python-dotenv==1.1.0
 python-multipart==0.0.20
 pytz==2025.2
+pyuca==1.2
 PyYAML==6.0.2
+qwen-vl-utils==0.0.8
+regex==2024.11.6
 requests==2.32.3
 retina-face==0.0.17
 rich==14.0.0
 ruff==0.11.4
 safehttpx==0.1.6
+safetensors==0.5.3
 semantic-version==2.10.0
 shellingham==1.5.4
 six==1.17.0
+smart_open==7.3.0.post1
 sniffio==1.3.1
 soupsieve==2.6
+spacy==3.8.7
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+spacy-thai==0.7.8
+spacy-udpipe==1.0.0
+srsly==2.5.1
 starlette==0.46.1
+sympy==1.14.0
 tensorboard==2.19.0
 tensorboard-data-server==0.7.2
 tensorflow==2.19.0
 tensorflow-io-gcs-filesystem==0.37.1
 termcolor==3.0.1
 tf_keras==2.19.0
+thinc==8.3.6
+timm==1.0.19
+tokenizers==0.21.2
 tomlkit==0.13.2
+torch==2.7.1
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.7cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
+torchao==0.13.0
+torchvision==0.22.1
 tqdm==4.67.1
+transformers==4.53.3
+triton==3.3.1
 typer==0.15.2
+typing-inspection
+typing_extensions
 tzdata==2025.2
+ufal.udpipe==1.3.1.1
 urllib3==2.3.0
 uvicorn==0.34.0
+wasabi==1.1.3
+weasel==0.4.1
 websockets==15.0.1
 Werkzeug==3.1.3
 wrapt==1.17.2
 xxhash==3.5.0
 yarl==1.19.0
+supabase==2.18.1
+supabase_auth==2.12.3
+supabase_functions==0.10.1
+# flash_attn==2.8.1

ui/layout.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import gradio as gr
 import time
 from logic.data_utils import CustomHFDatasetSaver
 from data.lang2eng_map import lang2eng_mapping
@@ -12,6 +13,161 @@ from .selection_page import build_selection_page
 from .main_page import build_main_page
 from .main_page import sort_with_pyuca
 def get_key_by_value(dictionary, value):
     for key, val in dictionary.items():
         if val == value:
@@ -100,14 +256,27 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
             object-fit: contain;        /* make sure the full image shows */
             height: 460px;           /* set a fixed height */
         }
     """
     ############################################################################
     with gr.Blocks(css=custom_css) as ui:
         local_storage = gr.State([None, None, "", ""])
         loading_example = gr.State(False) # to check if the values are loaded from a user click on an example in
         # First page: selection
-        selection_page, country_choice, language_choice, proceed_btn, username, password, intro_markdown = build_selection_page(metadata_dict)
         # Second page
         cmp_main_ui = build_main_page(concepts_dict, metadata_dict, local_storage)
@@ -144,8 +313,20 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
         modal_exclude_confirm = cmp_main_ui["modal_exclude_confirm"]
         cancel_exclude_btn = cmp_main_ui["cancel_exclude_btn"]
         confirm_exclude_btn = cmp_main_ui["confirm_exclude_btn"]
         ### Category button
         category_btn.change(
             fn=partial(load_concepts, concepts=concepts_dict),
@@ -214,7 +395,7 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
         clear_btn.click(
             fn=clear_data,
             outputs=[
-                image_inp, image_url_inp, long_caption_inp, exampleid_btn,
                 category_btn, concept_btn,
                 category_concept_dropdowns[0], category_concept_dropdowns[1], category_concept_dropdowns[2],
                 category_concept_dropdowns[3], category_concept_dropdowns[4]
@@ -280,12 +461,12 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
                         # Handle clicking on an example
                         user_examples.click(
                             fn=partial(handle_click_example, concepts_dict=concepts_dict),
-                            inputs=[user_examples],
                             outputs=[
                                 image_inp, image_url_inp, long_caption_inp, exampleid_btn,
                                 category_btn, concept_btn,
                                 category_concept_dropdowns[0], category_concept_dropdowns[1], category_concept_dropdowns[2],
-                                category_concept_dropdowns[3], category_concept_dropdowns[4], loading_example
                             ],
                         )
@@ -295,6 +476,41 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
         # ============================================ #
         # Submit Button Click events
         proceed_btn.click(
             fn=partial(switch_ui, flag=False),
@@ -313,8 +529,8 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
             ]
         ).then(
             fn=partial(update_user_data, HF_DATASET_NAME=HF_DATASET_NAME, local_ds_directory_path = LOCAL_DS_DIRECTORY_PATH),
-            inputs=[username_inp, password_inp, country_choice, language_choice],
-            outputs=[user_examples, loading_msg],
         )
@@ -322,7 +538,7 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
         exit_btn.click(
             fn=exit,
             outputs=[
-                image_inp, image_url_inp, long_caption_inp, user_examples, loading_msg,
                 username, password, local_storage, exampleid_btn, category_btn, concept_btn,
                 category_concept_dropdowns[0], category_concept_dropdowns[1], category_concept_dropdowns[2],
                 category_concept_dropdowns[3], category_concept_dropdowns[4]
@@ -368,7 +584,10 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
             "excluded": gr.State(value=False),
             "concepts_dict": gr.State(value=concepts_dict),
             "country_lang_map": gr.State(value=lang2eng_mapping),
             # "is_blurred": is_blurred
         }
         # data_outputs = [image_inp, image_url_inp, long_caption_inp,
         #         country_inp, language_inp, category_btn, concept_btn,
@@ -376,34 +595,56 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
         hf_writer.setup(list(data_outputs.keys()), local_ds_folder = LOCAL_DS_DIRECTORY_PATH)
         # STEP 4: Chain save_data, then update_user_data, then re-enable button, hide modal, and clear
-        submit_btn.click(
-            hf_writer.save,
-            list(data_outputs.values()),
-            None,
-        ).success(
-            fn=partial(clear_data, "submit"),
-            outputs=[
-                image_inp, image_url_inp, long_caption_inp, exampleid_btn,
-                category_btn, concept_btn,
-                category_concept_dropdowns[0], category_concept_dropdowns[1], category_concept_dropdowns[2],
-                category_concept_dropdowns[3], category_concept_dropdowns[4]
-            ],
-        # ).success(enable_submit,
-        #    None, [submit_btn]
-        # ).success(lambda: Modal(visible=False),
-        #    None, modal_saving
-        # ).success(lambda: Modal(visible=True),
-        #        None, modal_data_saved
-        ).success(
-            # set loading msg
-            lambda: gr.update(value="**Loading your data, please wait ...**"),
-            None, loading_msg
-        ).success(
-            fn=partial(update_user_data, HF_DATASET_NAME=HF_DATASET_NAME, local_ds_directory_path = LOCAL_DS_DIRECTORY_PATH),
-            inputs=[username_inp, password_inp, country_choice, language_choice],
-            outputs=[user_examples, loading_msg]
-        )
         # ============================================ #
         # instructions button
         instruct_btn.click(lambda: Modal(visible=True), None, modal)
@@ -446,13 +687,13 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
                 category_concept_dropdowns[0], category_concept_dropdowns[1], category_concept_dropdowns[2],
                 category_concept_dropdowns[3], category_concept_dropdowns[4],
                 timestamp_btn, username_inp, password_inp, exampleid_btn, gr.State(value=True),
-                gr.State(value=concepts_dict), gr.State(value=lang2eng_mapping)
             ],
             outputs=None
         ).success(
             fn=partial(clear_data, "remove"),
-            outputs=[
-                image_inp, image_url_inp, long_caption_inp, exampleid_btn,
                 category_btn, concept_btn,
                 category_concept_dropdowns[0], category_concept_dropdowns[1], category_concept_dropdowns[2],
                 category_concept_dropdowns[3], category_concept_dropdowns[4]
@@ -465,8 +706,32 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
             outputs=loading_msg
         ).success(
             fn=partial(update_user_data, HF_DATASET_NAME=HF_DATASET_NAME, local_ds_directory_path=LOCAL_DS_DIRECTORY_PATH),
-            inputs=[username_inp, password_inp, country_choice, language_choice],
-            outputs=[user_examples, loading_msg]
         )
-    return ui

 import gradio as gr
 import time
+from logic.supabase_client import auth_handler
 from logic.data_utils import CustomHFDatasetSaver
 from data.lang2eng_map import lang2eng_mapping
 from .main_page import build_main_page
 from .main_page import sort_with_pyuca
+js_code = """
+function() {
+    // Get the full URL with the fragment
+    const url = window.location.href;
+    const fragment = url.split('#')[1];
+    if (!fragment) {
+        return "";
+    }
+    // Parse the fragment into an object
+    const params = new URLSearchParams(fragment);
+    const access_token = params.get('access_token');
+    const refresh_token = params.get('refresh_token');
+    // Create a JSON string with the tokens
+    const tokens = JSON.stringify({
+        access_token: access_token,
+        refresh_token: refresh_token
+    });
+    // Return the JSON string to the Gradio output component
+    return tokens;
+}
+"""
+def login_user(email, password):
+    result = auth_handler.login(email, password)
+    if result['success']:
+        session_data = result['data']
+        persistent_data = {
+            "refresh_token": session_data['refresh_token'],
+            "user_email": session_data['user_email']
+        }
+        return session_data['client'], persistent_data, result['message']
+    else:
+        persistent_data = {
+            "refresh_token": "",
+            "user_email": ""
+        }
+        return None, persistent_data, result['message']
+def login_user_recovery(session_data: str):
+    """
+    This function receives session data (tokens as a JSON string) from the frontend,
+    retrieves the session, and returns data in a format similar to login_user.
+    """
+    try:
+        import json
+        tokens = json.loads(session_data)
+        access_token = tokens.get("access_token")
+        refresh_token = tokens.get("refresh_token")
+        if not access_token or not refresh_token:
+            return None, gr.skip(), "Invalid session data provided."
+        result = auth_handler.retrieve_session_from_tokens(access_token, refresh_token)
+        if result['success']:
+            session_data_result = result['data']
+            persistent_data = {
+                "refresh_token": session_data_result['refresh_token'],
+                "user_email": session_data_result['user_email']
+            }
+            return session_data_result['client'], persistent_data, result['message']
+        else:
+            persistent_data = {
+                "refresh_token": "",
+                "user_email": ""
+            }
+            return None, persistent_data, result['message']
+    except Exception as e:
+        return None, gr.skip(), f"Failed to process recovery login: {e}"
+def sign_up(email, password):
+    result = auth_handler.sign_up(email, password)
+    return result['message']
+def reset_password(email):
+    result = auth_handler.reset_password_for_email(email)
+    return result['message']
+def log_out(supabase_user_client, persistent_session):
+    """
+    Logs out the user and clears the session. If error occurs, it returns an empty persistent session (logging out user).
+    """
+    persistent_session = {
+        "refresh_token": "",
+        "user_email": ""
+    }
+    if supabase_user_client:
+        result = auth_handler.logout(supabase_user_client)
+        if result['success']:
+            print("User logged out successfully.")
+            return persistent_session
+        else:
+            print(f"Error logging out: {result['message']}")
+            return persistent_session
+    else:
+        print("No user client provided to log out.")
+        return persistent_session
+def restore_user_session(session_data, login_status=None):
+    print("Restoring user session with data:", session_data)
+    # defualt values if the user is not logged in
+    # or the session data is not valid
+    login_status_update = gr.update(value= login_status if login_status else "")
+    proceed_button_update = gr.update(value="Proceed as Anonymous User", interactive=True)
+    login_button_update = gr.update(visible=True)
+    sign_up_button_update = gr.update(visible=True)
+    reset_password_button_update = gr.update(visible=True)
+    logout_button_update = gr.update(visible=False)
+    change_password_field_update = gr.update(visible=False)
+    change_password_field_confirm_update = gr.update(visible=False)
+    change_password_button_update = gr.update(visible=False)
+    change_password_status_update = gr.update(value="")
+    persistent_data = {
+        "refresh_token": "",
+        "user_email": ""
+    }
+    if not session_data or not session_data.get('refresh_token', ''):
+        print("No session data found, proceeding as anonymous user.")
+        return None, persistent_data, login_status_update, proceed_button_update, login_button_update, sign_up_button_update, reset_password_button_update, logout_button_update, change_password_field_update, change_password_field_confirm_update, change_password_button_update, change_password_status_update
+    result = auth_handler.restore_session(session_data['refresh_token'])
+    if result['success']:
+        restored_session = result['data']
+        new_persistent_data = {
+            "refresh_token": restored_session['refresh_token'],
+            "user_email": restored_session['user_email']
+        }
+        login_status_update = gr.update(value=result['message'])
+        proceed_button_update = gr.update(value="Proceed", interactive=True)
+        login_button_update = gr.update(visible=False)
+        sign_up_button_update = gr.update(visible=False)
+        reset_password_button_update = gr.update(visible=False)
+        logout_button_update = gr.update(visible=True)
+        change_password_field_update = gr.update(visible=True)
+        change_password_field_confirm_update = gr.update(visible=True)
+        change_password_button_update = gr.update(visible=True)
+        return restored_session['client'], new_persistent_data, login_status_update, proceed_button_update, login_button_update, sign_up_button_update, reset_password_button_update, logout_button_update, change_password_field_update, change_password_field_confirm_update, change_password_button_update, change_password_status_update
+    else:
+        return None, persistent_data, login_status_update, proceed_button_update, login_button_update, sign_up_button_update, reset_password_button_update, logout_button_update, change_password_field_update, change_password_field_confirm_update, change_password_button_update, change_password_status_update
+def change_password(supabase_user_client, new_password, confirm_password):
+    """
+    Changes the user's password.
+    """
+    if new_password != confirm_password:
+        return "Passwords do not match. Please try again."
+    result = auth_handler.change_password(supabase_user_client, new_password)
+    return result['message']
 def get_key_by_value(dictionary, value):
     for key, val in dictionary.items():
         if val == value:
             object-fit: contain;        /* make sure the full image shows */
             height: 460px;           /* set a fixed height */
         }
+        #vlm_output .input-container {
+        position: relative;
+        }
+        #vlm_output .input-container::before {
+        content: "";
+        position: absolute;
+        top: 0; left: 0; right: 0; bottom: 0;
+        z-index: 10;             /* sits above the textarea */
+        background: transparent;
+        }
     """
     ############################################################################
     with gr.Blocks(css=custom_css) as ui:
+        supabase_user_client = gr.State(None)
+        persistent_session = gr.BrowserState(None)
         local_storage = gr.State([None, None, "", ""])
         loading_example = gr.State(False) # to check if the values are loaded from a user click on an example in
         # First page: selection
+        selection_page, country_choice, language_choice, proceed_btn, username, password, intro_markdown, login_btn, sign_up_btn, reset_password_btn, login_status, logout_btn, change_password_field, change_password_field_confirm, change_password_btn, change_password_status = build_selection_page(metadata_dict)
         # Second page
         cmp_main_ui = build_main_page(concepts_dict, metadata_dict, local_storage)
         modal_exclude_confirm = cmp_main_ui["modal_exclude_confirm"]
         cancel_exclude_btn = cmp_main_ui["cancel_exclude_btn"]
         confirm_exclude_btn = cmp_main_ui["confirm_exclude_btn"]
+        vlm_output = cmp_main_ui["vlm_output"]
+        gen_button = cmp_main_ui["gen_button"]
+        vlm_feedback = cmp_main_ui["vlm_feedback"]
+        modal_vlm = cmp_main_ui["modal_vlm"]
+        vlm_no_btn = cmp_main_ui["vlm_no_btn"]
+        vlm_done_btn = cmp_main_ui["vlm_done_btn"]
+        submit_yes = cmp_main_ui["submit_yes"]
+        submit_no = cmp_main_ui["submit_no"]
+        modal_submit = cmp_main_ui["modal_submit"]
+        vlm_cancel_btn = cmp_main_ui["vlm_cancel_btn"]
+        vlm_model_dropdown = cmp_main_ui["vlm_model_dropdown"]
+        # dictionary to store all vlm_output by exampleid
+        vlm_captions = gr.State(None)
         ### Category button
         category_btn.change(
             fn=partial(load_concepts, concepts=concepts_dict),
         clear_btn.click(
             fn=clear_data,
             outputs=[
+                image_inp, image_url_inp, long_caption_inp, vlm_output, vlm_feedback, vlm_done_btn, vlm_no_btn, gen_button, exampleid_btn,
                 category_btn, concept_btn,
                 category_concept_dropdowns[0], category_concept_dropdowns[1], category_concept_dropdowns[2],
                 category_concept_dropdowns[3], category_concept_dropdowns[4]
                         # Handle clicking on an example
                         user_examples.click(
                             fn=partial(handle_click_example, concepts_dict=concepts_dict),
+                            inputs=[user_examples, vlm_captions],
                             outputs=[
                                 image_inp, image_url_inp, long_caption_inp, exampleid_btn,
                                 category_btn, concept_btn,
                                 category_concept_dropdowns[0], category_concept_dropdowns[1], category_concept_dropdowns[2],
+                                category_concept_dropdowns[3], category_concept_dropdowns[4], loading_example, vlm_output
                             ],
                         )
         # ============================================ #
         # Submit Button Click events
+        login_btn.click(
+            fn=login_user,
+            inputs=[username, password],
+            outputs=[supabase_user_client, persistent_session, login_status],
+        ).then(
+            fn=restore_user_session,
+            inputs=[persistent_session, login_status],
+            outputs=[supabase_user_client, persistent_session, login_status, proceed_btn, login_btn, sign_up_btn, reset_password_btn, logout_btn, change_password_field, change_password_field_confirm, change_password_btn, change_password_status],
+        )
+        sign_up_btn.click(
+            fn=sign_up,
+            inputs=[username, password],
+            outputs=[login_status],
+        )
+        logout_btn.click(
+            fn=log_out,
+            inputs=[supabase_user_client, persistent_session],
+            outputs=[persistent_session]
+        ).then(
+            fn=restore_user_session,
+            inputs=[persistent_session],
+            outputs=[supabase_user_client, persistent_session, login_status, proceed_btn, login_btn, sign_up_btn, reset_password_btn, logout_btn, change_password_field, change_password_field_confirm, change_password_btn, change_password_status],
+        )
+        change_password_btn.click(
+            fn=change_password,
+            inputs=[supabase_user_client, change_password_field, change_password_field_confirm],
+            outputs=[change_password_status]
+        )
+        reset_password_btn.click(
+            fn=reset_password,
+            inputs=[username],
+            outputs=[login_status]
+        )
         proceed_btn.click(
             fn=partial(switch_ui, flag=False),
             ]
         ).then(
             fn=partial(update_user_data, HF_DATASET_NAME=HF_DATASET_NAME, local_ds_directory_path = LOCAL_DS_DIRECTORY_PATH),
+            inputs=[supabase_user_client, country_choice, language_choice],
+            outputs=[user_examples, loading_msg, vlm_captions],
         )
         exit_btn.click(
             fn=exit,
             outputs=[
+                image_inp, image_url_inp, long_caption_inp, vlm_output, vlm_feedback, vlm_done_btn, vlm_no_btn, gen_button, user_examples, loading_msg,
                 username, password, local_storage, exampleid_btn, category_btn, concept_btn,
                 category_concept_dropdowns[0], category_concept_dropdowns[1], category_concept_dropdowns[2],
                 category_concept_dropdowns[3], category_concept_dropdowns[4]
             "excluded": gr.State(value=False),
             "concepts_dict": gr.State(value=concepts_dict),
             "country_lang_map": gr.State(value=lang2eng_mapping),
+            "client": supabase_user_client,
             # "is_blurred": is_blurred
+            "vlm_caption": vlm_output,
+            "vlm_feedback": vlm_feedback
         }
         # data_outputs = [image_inp, image_url_inp, long_caption_inp,
         #         country_inp, language_inp, category_btn, concept_btn,
         hf_writer.setup(list(data_outputs.keys()), local_ds_folder = LOCAL_DS_DIRECTORY_PATH)
         # STEP 4: Chain save_data, then update_user_data, then re-enable button, hide modal, and clear
+        # submit_btn.click(lambda: Modal(visible=True), None, modal_vlm)
+        submit_btn.click(submit_button_clicked,
+                        inputs=[vlm_output],
+                        outputs=[modal_vlm, modal_submit])
+        # submit_btn.click(partial(submit_button_clicked, save_fn=hf_writer.save,
+        #                             data_outputs=data_outputs),
+        #                 inputs=[vlm_output],
+        #                 outputs=[modal_vlm, image_inp, image_url_inp, long_caption_inp, vlm_output, vlm_feedback, exampleid_btn,
+        #                          category_btn, concept_btn, category_concept_dropdowns[0], category_concept_dropdowns[1],
+        #                          category_concept_dropdowns[2], category_concept_dropdowns[3], category_concept_dropdowns[4]])
+        def wire_submit_chain(button, modal_ui):
+            e = button.click(
+                fn=lambda: Modal(visible=False),
+                outputs=[modal_ui]
+            ).success(
+                hf_writer.save,
+                inputs = list(data_outputs.values()),
+                outputs = None,
+            ).success(
+                fn=partial(clear_data, "submit"),
+                outputs=[
+                    image_inp, image_url_inp, long_caption_inp, vlm_output, vlm_feedback, vlm_done_btn, vlm_no_btn, gen_button, exampleid_btn,
+                    category_btn, concept_btn,
+                    category_concept_dropdowns[0], category_concept_dropdowns[1], category_concept_dropdowns[2],
+                    category_concept_dropdowns[3], category_concept_dropdowns[4]
+                ],
+            # ).success(enable_submit,
+            #    None, [submit_btn]
+            # ).success(lambda: Modal(visible=False),
+            #    None, modal_saving
+            # ).success(lambda: Modal(visible=True),
+            #        None, modal_data_saved
+            ).success(
+                # set loading msg
+                lambda: gr.update(value="**Loading your data, please wait ...**"),
+                None, loading_msg
+            ).success(
+                fn=partial(update_user_data, HF_DATASET_NAME=HF_DATASET_NAME, local_ds_directory_path = LOCAL_DS_DIRECTORY_PATH),
+                inputs=[supabase_user_client, country_choice, language_choice],
+                outputs=[user_examples, loading_msg, vlm_captions]
+            )
+            return e
+        wire_submit_chain(vlm_done_btn, modal_vlm)
+        wire_submit_chain(vlm_no_btn, modal_vlm)
+        wire_submit_chain(submit_yes, modal_submit)
+        submit_no.click(lambda: Modal(visible=False), None, modal_submit)
+        vlm_cancel_btn.click(lambda: Modal(visible=False), None, modal_vlm)
         # ============================================ #
         # instructions button
         instruct_btn.click(lambda: Modal(visible=True), None, modal)
                 category_concept_dropdowns[0], category_concept_dropdowns[1], category_concept_dropdowns[2],
                 category_concept_dropdowns[3], category_concept_dropdowns[4],
                 timestamp_btn, username_inp, password_inp, exampleid_btn, gr.State(value=True),
+                gr.State(value=concepts_dict), gr.State(value=lang2eng_mapping), vlm_output, vlm_feedback
             ],
             outputs=None
         ).success(
             fn=partial(clear_data, "remove"),
+            outputs=[
+                image_inp, image_url_inp, long_caption_inp, vlm_output, vlm_feedback, vlm_done_btn, vlm_no_btn, gen_button, exampleid_btn,
                 category_btn, concept_btn,
                 category_concept_dropdowns[0], category_concept_dropdowns[1], category_concept_dropdowns[2],
                 category_concept_dropdowns[3], category_concept_dropdowns[4]
             outputs=loading_msg
         ).success(
             fn=partial(update_user_data, HF_DATASET_NAME=HF_DATASET_NAME, local_ds_directory_path=LOCAL_DS_DIRECTORY_PATH),
+            inputs=[supabase_user_client, country_choice, language_choice],
+            outputs=[user_examples, loading_msg, vlm_captions]
+        )
+        # ============================================= #
+        # VLM Gen button
+        # ============================================= #
+        gen_button.click(
+            fn=generate_vlm_caption, # processor=processor, model=model
+            inputs=[image_inp, vlm_model_dropdown],
+            outputs=[vlm_output, vlm_feedback, vlm_done_btn, vlm_no_btn, gen_button]
+        )
+        # vlm_output.change(
+        #     fn=lambda : gr.update(interactive=False) if vlm_output.value else gr.update(interactive=True),
+        #     inputs=[],
+        #     outputs=[gen_button]
+        # )
+        ui.load(
+            fn=login_user_recovery,
+            inputs=gr.Textbox(visible=False, value=""), # hidden textbox to get the url tokens
+            outputs=[supabase_user_client, persistent_session, login_status],
+            js=js_code
+        ).then(
+            fn=restore_user_session,
+            inputs=[persistent_session],
+            outputs=[supabase_user_client, persistent_session, login_status, proceed_btn, login_btn, sign_up_btn, reset_password_btn, logout_btn, change_password_field, change_password_field_confirm, change_password_btn, change_password_status],
         )
+    return ui

ui/main_page.py CHANGED Viewed

@@ -107,7 +107,36 @@ def build_main_page(concepts_dict, metadata_dict, local_storage):
                     long_caption_inp = gr.Textbox(lines=6, label="Description", elem_id="long_caption_inp")
                     num_words_inp = gr.Textbox(lines=1, label="Number of words", elem_id="num_words", interactive=False, value=0)
                     # num_words_inp = gr.Markdown("Number of words", elem_id="num_words")
                 categories_list = sort_with_pyuca(list(concepts_dict["USA"]["English"].keys()))
                 def create_category_dropdown(category, index):
@@ -226,5 +255,16 @@ def build_main_page(concepts_dict, metadata_dict, local_storage):
         "modal_exclude_confirm": modal_exclude_confirm,
         "cancel_exclude_btn": cancel_exclude_btn,
         "confirm_exclude_btn": confirm_exclude_btn,
     }
     return output_dict

                     long_caption_inp = gr.Textbox(lines=6, label="Description", elem_id="long_caption_inp")
                     num_words_inp = gr.Textbox(lines=1, label="Number of words", elem_id="num_words", interactive=False, value=0)
                     # num_words_inp = gr.Markdown("Number of words", elem_id="num_words")
+                #########################################################
+                with Modal(visible=False, allow_user_close=False) as modal_vlm:
+                    question = gr.Markdown("Would you like to see if a VLM can generate a culturally aware description for your uploaded concept?")
+                    with gr.Row():
+                        gen_button = gr.Button("Yes", variant="primary", elem_id="generate_answer_btn")
+                        vlm_no_btn = gr.Button("No")
+                        vlm_cancel_btn = gr.Button("Cancel")
+                    vlm_model_dropdown = gr.Dropdown(
+                        ["SmolVLM-500M", "Qwen2.5-VL-7B", "InternVL3_5-8B", "Gemma3-4B"], value="Gemma3-4B", multiselect=False, label="VLM Model", info="Select the VLM model to use for generating the description."
+                    )
+                    vlm_output = gr.Textbox(lines=6, label="Generated description", elem_id="vlm_output", interactive=False)
+                    vlm_feedback = gr.Radio(["Yes 👍", "No 👎"], label="Do you think the generated description is accurate within the cultural context of your country?", visible=False, elem_id="vlm_feedback", interactive=True)
+                    vlm_done_btn = gr.Button("Complete Submission", visible=False)
+                with Modal(visible=False, allow_user_close=False) as modal_submit:
+                    gr.Markdown("⚠️ You've already generated a caption for this image. An optional description with the VLM can only be generated once. Would you like to proceed and submit your modified data?")
+                    with gr.Row():
+                        submit_yes = gr.Button("Yes", variant="primary", elem_id="submit_confirm_yes")
+                        submit_no = gr.Button("No", variant="stop", elem_id="submit_confirm_no")
+                    # with gr.Group():
+                    #     gr.Markdown("### VLM Generation (Optional)")
+                    #     with gr.Accordion("📘 Click here if you want to get a generated answer from a small vlm", open=False):
+                    #         gen_button = gr.Button("Generate Answer", variant="primary", elem_id="generate_answer_btn")
+                    #         vlm_output = gr.Textbox(lines=6, label="Generated Answer", elem_id="vlm_output", interactive=False)
+                    #         vlm_feedback = gr.Radio(["Yes 👍", "No 👎"], label="Do you like the generated caption?", visible=False, elem_id="vlm_feedback", interactive=True)
+                ##########################################################
                 categories_list = sort_with_pyuca(list(concepts_dict["USA"]["English"].keys()))
                 def create_category_dropdown(category, index):
         "modal_exclude_confirm": modal_exclude_confirm,
         "cancel_exclude_btn": cancel_exclude_btn,
         "confirm_exclude_btn": confirm_exclude_btn,
+        "vlm_output": vlm_output,
+        "gen_button": gen_button,
+        "vlm_feedback": vlm_feedback,
+        "modal_vlm": modal_vlm,
+        "vlm_no_btn": vlm_no_btn,
+        "vlm_done_btn": vlm_done_btn,
+        "submit_yes": submit_yes,
+        "submit_no": submit_no,
+        "modal_submit": modal_submit,
+        "vlm_cancel_btn": vlm_cancel_btn,
+        "vlm_model_dropdown": vlm_model_dropdown
     }
     return output_dict

ui/selection_page.py CHANGED Viewed

@@ -57,6 +57,24 @@ def build_selection_page(metadata_dict):
             username = gr.Textbox(label="Email (optional)", type="email", elem_id="username_text")
             password = gr.Textbox(label="Password (optional)", type="password", elem_id="password_text")
-        proceed_btn = gr.Button("Proceed")
-    return selection_page, country_choice, language_choice, proceed_btn, username, password, intro_markdown

             username = gr.Textbox(label="Email (optional)", type="email", elem_id="username_text")
             password = gr.Textbox(label="Password (optional)", type="password", elem_id="password_text")
+        with gr.Row():
+            login_btn = gr.Button("Login", elem_id="login_btn")
+            sign_up_btn = gr.Button("Sign up", elem_id="sign_up_btn")
+            reset_password_btn = gr.Button("Reset Password", elem_id="reset_password_btn")
+            logout_btn = gr.Button("Logout", elem_id="logout_btn",visible=False)
+        login_status = gr.Markdown("")
+        with gr.Row():
+            proceed_btn = gr.Button("Proceed")
+        with gr.Row():
+            change_password_field = gr.Textbox(
+                label="Change Password", type="password", elem_id="change_password_field", visible=True
+            )
+            change_password_field_confirm = gr.Textbox(
+                label="Confirm New Password", type="password", elem_id="change_password_field_confirm", visible=True
+            )
+        with gr.Row():
+            change_password_btn = gr.Button("Change Password", elem_id="change_password_btn", visible=True)
+        change_password_status = gr.Markdown("")
+    return selection_page, country_choice, language_choice, proceed_btn, username, password, intro_markdown, login_btn, sign_up_btn, reset_password_btn, login_status, logout_btn, change_password_field, change_password_field_confirm, change_password_btn, change_password_status