Spaces:
Running
Running
File size: 16,248 Bytes
540a985 c61e1ad 540a985 c61e1ad 540a985 56c5ad3 540a985 56c5ad3 540a985 56c5ad3 540a985 56c5ad3 540a985 c61e1ad 540a985 c61e1ad 540a985 c61e1ad 540a985 c61e1ad 540a985 56c5ad3 c61e1ad 540a985 c61e1ad 540a985 c61e1ad 540a985 c61e1ad 540a985 c61e1ad 56c5ad3 540a985 56c5ad3 c61e1ad 540a985 c61e1ad 540a985 56c5ad3 540a985 c61e1ad 540a985 56c5ad3 540a985 56c5ad3 540a985 56c5ad3 540a985 56c5ad3 540a985 56c5ad3 540a985 c61e1ad 540a985 c61e1ad 540a985 c61e1ad 540a985 c61e1ad 540a985 c61e1ad 540a985 c61e1ad 540a985 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 |
import gradio as gr
import os
import random
import csv
from pathlib import Path
from datetime import datetime, timedelta
import tempfile
from huggingface_hub import HfApi, hf_hub_download, login
from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError
from apscheduler.schedulers.background import BackgroundScheduler
import atexit
import threading
import time
import shutil
# --- Configuration ---
DATASET_REPO_ID = os.getenv("DATASET_REPO_ID", "matsant01/user-study-collected-preferences")
HF_TOKEN = os.getenv("HF_TOKEN")
RESULTS_FILENAME_IN_REPO = "preferences.csv"
TEMP_DIR = tempfile.mkdtemp()
LOCAL_RESULTS_FILE = Path(TEMP_DIR) / RESULTS_FILENAME_IN_REPO
UPLOAD_INTERVAL_HOURS = 0.1
DATA_DIR = Path("data")
IMAGE_EXTENSIONS = [".png", ".jpg", ".jpeg", ".webp"]
# --- Global State for Upload Logic ---
hf_api = None
scheduler = BackgroundScheduler(daemon=True)
upload_lock = threading.Lock()
new_preferences_recorded_since_last_upload = threading.Event()
# --- Hugging Face Hub Login & Initialization ---
def initialize_hub_and_results():
global hf_api
if HF_TOKEN:
print("Logging into Hugging Face Hub...")
try:
login(token=HF_TOKEN)
hf_api = HfApi()
print(f"Attempting initial download of {RESULTS_FILENAME_IN_REPO} from {DATASET_REPO_ID}")
hf_hub_download(
repo_id=DATASET_REPO_ID,
filename=RESULTS_FILENAME_IN_REPO,
repo_type="dataset",
token=HF_TOKEN,
local_dir=TEMP_DIR,
local_dir_use_symlinks=False
)
print(f"Successfully downloaded existing {RESULTS_FILENAME_IN_REPO} to {LOCAL_RESULTS_FILE}")
except EntryNotFoundError:
print(f"{RESULTS_FILENAME_IN_REPO} not found in repo. Will create locally.")
except RepositoryNotFoundError:
print(f"Error: Dataset repository {DATASET_REPO_ID} not found or token lacks permissions.")
print("Results saving will be disabled.")
hf_api = None
except Exception as e:
print(f"Error during initial download/login: {e}")
print("Proceeding without initial download. File will be created locally.")
else:
print("Warning: HF_TOKEN secret not found. Results will not be saved to the Hub.")
hf_api = None
# --- Data Loading ---
def find_image(folder_path: Path, base_name: str) -> Path | None:
for ext in IMAGE_EXTENSIONS:
file_path = folder_path / f"{base_name}{ext}"
if file_path.exists():
return file_path
return None
def get_sample_ids() -> list[str]:
sample_ids = []
if DATA_DIR.is_dir():
for item in DATA_DIR.iterdir():
if item.is_dir():
prompt_file = item / "prompt.txt"
input_bg = find_image(item, "input_bg")
input_fg = find_image(item, "input_fg")
output_baseline = find_image(item, "baseline")
output_tficon = find_image(item, "tf-icon")
if prompt_file.exists() and input_bg and input_fg and output_baseline and output_tficon:
sample_ids.append(item.name)
return sample_ids
def load_sample_data(sample_id: str) -> dict | None:
sample_path = DATA_DIR / sample_id
if not sample_path.is_dir():
return None
prompt_file = sample_path / "prompt.txt"
input_bg_path = find_image(sample_path, "input_bg")
input_fg_path = find_image(sample_path, "input_fg")
output_baseline_path = find_image(sample_path, "baseline")
output_tficon_path = find_image(sample_path, "tf-icon")
if not all([prompt_file.exists(), input_bg_path, input_fg_path, output_baseline_path, output_tficon_path]):
print(f"Warning: Missing files in sample {sample_id}")
return None
try:
prompt = prompt_file.read_text().strip()
except Exception as e:
print(f"Error reading prompt for {sample_id}: {e}")
return None
return {
"id": sample_id,
"prompt": prompt,
"input_bg": str(input_bg_path),
"input_fg": str(input_fg_path),
"output_baseline": str(output_baseline_path),
"output_tficon": str(output_tficon_path),
}
# --- State and UI Logic ---
INITIAL_SAMPLE_IDS = get_sample_ids()
def get_next_sample(available_ids: list[str]) -> tuple[dict | None, list[str]]:
if not available_ids:
return None, []
chosen_id = random.choice(available_ids)
remaining_ids = [id for id in available_ids if id != chosen_id]
sample_data = load_sample_data(chosen_id)
return sample_data, remaining_ids
def display_new_sample(state: dict, available_ids: list[str]):
sample_data, remaining_ids = get_next_sample(available_ids)
if not sample_data:
return {
prompt_display: gr.update(value="**Prompt:** No more samples available. Thank you!"),
input_bg_display: gr.update(value=None, visible=False),
input_fg_display: gr.update(value=None, visible=False),
output_a_display: gr.update(value=None, visible=False),
output_b_display: gr.update(value=None, visible=False),
choice_button_a: gr.update(visible=False),
choice_button_b: gr.update(visible=False),
next_button: gr.update(visible=False),
status_display: gr.update(value="**Status:** Completed!"),
app_state: state,
available_samples_state: remaining_ids
}
outputs = [
{"model_name": "baseline", "path": sample_data["output_baseline"]},
{"model_name": "tf-icon", "path": sample_data["output_tficon"]},
]
random.shuffle(outputs)
output_a = outputs[0]
output_b = outputs[1]
state = {
"current_sample_id": sample_data["id"],
"output_a_model_name": output_a["model_name"],
"output_b_model_name": output_b["model_name"],
}
return {
prompt_display: gr.update(value=f"**Prompt:** {sample_data['prompt']}"),
input_bg_display: gr.update(value=sample_data["input_bg"], visible=True),
input_fg_display: gr.update(value=sample_data["input_fg"], visible=True),
output_a_display: gr.update(value=output_a["path"], visible=True),
output_b_display: gr.update(value=output_b["path"], visible=True),
choice_button_a: gr.update(visible=True, interactive=True),
choice_button_b: gr.update(visible=True, interactive=True),
next_button: gr.update(visible=False),
status_display: gr.update(value="**Status:** Please choose the image you prefer."),
app_state: state,
available_samples_state: remaining_ids
}
def record_preference(choice: str, state: dict, request: gr.Request):
if not request:
print("Error: Request object is None. Cannot get session ID.")
session_id = "unknown_session"
else:
try:
session_id = request.client.host
except AttributeError:
print("Error: request.client is None or has no 'host' attribute.")
session_id = "unknown_client"
if not state or "current_sample_id" not in state:
print("Warning: State missing, cannot record preference.")
return {
choice_button_a: gr.update(interactive=False),
choice_button_b: gr.update(interactive=False),
next_button: gr.update(visible=True, interactive=True),
status_display: gr.update(value="**Status:** Error: Session state lost. Click Next Sample."),
app_state: state
}
chosen_model_name = state["output_a_model_name"] if choice == "A" else state["output_b_model_name"]
baseline_display = "A" if state["output_a_model_name"] == "baseline" else "B"
tficon_display = "B" if state["output_a_model_name"] == "baseline" else "A"
new_row = {
"timestamp": datetime.now().isoformat(),
"session_id": session_id,
"sample_id": state["current_sample_id"],
"baseline_displayed_as": baseline_display,
"tficon_displayed_as": tficon_display,
"chosen_display": choice,
"chosen_model_name": chosen_model_name
}
header = list(new_row.keys())
try:
with upload_lock:
file_exists = LOCAL_RESULTS_FILE.exists()
mode = 'a' if file_exists else 'w'
with open(LOCAL_RESULTS_FILE, mode, newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=header)
if not file_exists or os.path.getsize(LOCAL_RESULTS_FILE) == 0:
writer.writeheader()
print(f"Created or wrote header to {LOCAL_RESULTS_FILE}")
writer.writerow(new_row)
print(f"Appended preference for {state['current_sample_id']} to local file.")
new_preferences_recorded_since_last_upload.set()
except Exception as e:
print(f"Error writing local results file {LOCAL_RESULTS_FILE}: {e}")
return {
choice_button_a: gr.update(interactive=False),
choice_button_b: gr.update(interactive=False),
next_button: gr.update(visible=True, interactive=True),
status_display: gr.update(value=f"**Status:** Error saving preference locally: {e}. Click Next."),
app_state: state
}
return {
choice_button_a: gr.update(interactive=False),
choice_button_b: gr.update(interactive=False),
next_button: gr.update(visible=True, interactive=True),
status_display: gr.update(value=f"**Status:** Preference recorded (Chose {choice}). Click Next Sample."),
app_state: state
}
def upload_preferences_to_hub():
print("Periodic upload check triggered.")
if not hf_api:
print("Upload check skipped: Hugging Face API not available.")
return
if not new_preferences_recorded_since_last_upload.is_set():
print("Upload check skipped: No new preferences recorded since last upload.")
return
with upload_lock:
if not new_preferences_recorded_since_last_upload.is_set():
print("Upload check skipped (race condition avoided): No new preferences.")
return
if not LOCAL_RESULTS_FILE.exists() or os.path.getsize(LOCAL_RESULTS_FILE) == 0:
print("Upload check skipped: Local results file is missing or empty.")
new_preferences_recorded_since_last_upload.clear()
return
try:
print(f"Attempting to upload {LOCAL_RESULTS_FILE} to {DATASET_REPO_ID}/{RESULTS_FILENAME_IN_REPO}")
start_time = time.time()
hf_api.upload_file(
path_or_fileobj=str(LOCAL_RESULTS_FILE),
path_in_repo=RESULTS_FILENAME_IN_REPO,
repo_id=DATASET_REPO_ID,
repo_type="dataset",
commit_message=f"Periodic upload of preferences - {datetime.now().isoformat()}"
)
end_time = time.time()
print(f"Successfully uploaded preferences. Took {end_time - start_time:.2f} seconds.")
new_preferences_recorded_since_last_upload.clear()
except Exception as e:
print(f"Error uploading results file: {e}")
def handle_choice_a(state: dict, request: gr.Request):
return record_preference("A", state, request)
def handle_choice_b(state: dict, request: gr.Request):
return record_preference("B", state, request)
with gr.Blocks(title="Image Composition User Study") as demo:
gr.Markdown("# Image Composition User Study")
gr.Markdown(
"> Please look at the input images and the prompt below. "
"Then, compare the two output images (Output A and Output B) and click the button below the one you prefer."
)
app_state = gr.State({})
available_samples_state = gr.State(INITIAL_SAMPLE_IDS)
status_display = gr.Markdown("**Status:** Loading first sample...")
gr.Markdown("## Inputs")
with gr.Row():
prompt_display = gr.Markdown("**Prompt:** Loading...")
with gr.Row():
with gr.Column():
gr.Markdown("<div style='text-align: center;'>Input Background</div>")
input_bg_display = gr.Image(type="filepath", height=250, width=250, interactive=False, show_label=False)
with gr.Column():
gr.Markdown("<div style='text-align: center;'>Input Foreground</div>")
input_fg_display = gr.Image(type="filepath", height=250, width=250, interactive=False, show_label=False)
gr.Markdown("---")
gr.Markdown("## Choose your preferred output")
with gr.Row():
with gr.Column():
output_a_display = gr.Image(label="Output A", type="filepath", height=400, width=400, interactive=False)
choice_button_a = gr.Button("Choose Output A", variant="primary")
with gr.Column():
output_b_display = gr.Image(label="Output B", type="filepath", height=400, width=400, interactive=False)
choice_button_b = gr.Button("Choose Output B", variant="primary")
next_button = gr.Button("🔁 Next Sample 🔁", visible=False)
demo.load(
fn=display_new_sample,
inputs=[app_state, available_samples_state],
outputs=[
prompt_display, input_bg_display, input_fg_display,
output_a_display, output_b_display,
choice_button_a, choice_button_b, next_button, status_display,
app_state, available_samples_state
]
)
choice_button_a.click(
fn=handle_choice_a,
inputs=[app_state],
outputs=[choice_button_a, choice_button_b, next_button, status_display, app_state],
api_name=False,
)
choice_button_b.click(
fn=handle_choice_b,
inputs=[app_state],
outputs=[choice_button_a, choice_button_b, next_button, status_display, app_state],
api_name=False,
)
next_button.click(
fn=display_new_sample,
inputs=[app_state, available_samples_state],
outputs=[
prompt_display, input_bg_display, input_fg_display,
output_a_display, output_b_display,
choice_button_a, choice_button_b, next_button, status_display,
app_state, available_samples_state
],
api_name=False,
)
def cleanup_temp_dir():
if Path(TEMP_DIR).exists():
print(f"Cleaning up temporary directory: {TEMP_DIR}")
shutil.rmtree(TEMP_DIR, ignore_errors=True)
def shutdown_hook():
print("Application shutting down. Performing final upload check...")
upload_preferences_to_hub()
if scheduler.running:
print("Shutting down scheduler...")
scheduler.shutdown(wait=False)
cleanup_temp_dir()
print("Shutdown complete.")
atexit.register(shutdown_hook)
if __name__ == "__main__":
initialize_hub_and_results()
if not INITIAL_SAMPLE_IDS:
print("Error: No valid samples found in the 'data' directory.")
print("Please ensure the 'data' directory exists and contains subdirectories")
print("named like 'sample_id', each with 'prompt.txt', 'input_bg.*',")
print("'input_fg.*', 'baseline.*', and 'tf-icon.*' files.")
elif not DATASET_REPO_ID:
print("Error: DATASET_REPO_ID environment variable is not set or is set to the default placeholder.")
print("Please set the DATASET_REPO_ID environment variable or update the script.")
elif hf_api:
print(f"Starting periodic upload scheduler (every {UPLOAD_INTERVAL_HOURS} hours)...")
scheduler.add_job(upload_preferences_to_hub, 'interval', hours=UPLOAD_INTERVAL_HOURS)
scheduler.start()
print(f"Found {len(INITIAL_SAMPLE_IDS)} samples.")
print(f"Configured to save results periodically to Hugging Face Dataset: {DATASET_REPO_ID}")
print("Starting Gradio app...")
demo.launch(server_name="0.0.0.0")
else:
print("Warning: Running without Hugging Face Hub integration (HF_TOKEN or DATASET_REPO_ID missing/invalid).")
print(f"Found {len(INITIAL_SAMPLE_IDS)} samples.")
print("Starting Gradio app...")
demo.launch(server_name="0.0.0.0") |