Spaces:

matsant01
/

dit-editor-user-study

Running

File size: 16,248 Bytes

import gradio as gr
import os
import random
import csv
from pathlib import Path
from datetime import datetime, timedelta
import tempfile
from huggingface_hub import HfApi, hf_hub_download, login
from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError
from apscheduler.schedulers.background import BackgroundScheduler
import atexit
import threading
import time
import shutil

# --- Configuration ---
DATASET_REPO_ID = os.getenv("DATASET_REPO_ID", "matsant01/user-study-collected-preferences")
HF_TOKEN = os.getenv("HF_TOKEN")
RESULTS_FILENAME_IN_REPO = "preferences.csv"
TEMP_DIR = tempfile.mkdtemp()
LOCAL_RESULTS_FILE = Path(TEMP_DIR) / RESULTS_FILENAME_IN_REPO
UPLOAD_INTERVAL_HOURS = 0.1

DATA_DIR = Path("data")
IMAGE_EXTENSIONS = [".png", ".jpg", ".jpeg", ".webp"]

# --- Global State for Upload Logic ---
hf_api = None
scheduler = BackgroundScheduler(daemon=True)
upload_lock = threading.Lock()
new_preferences_recorded_since_last_upload = threading.Event()

# --- Hugging Face Hub Login & Initialization ---
def initialize_hub_and_results():
    global hf_api
    if HF_TOKEN:
        print("Logging into Hugging Face Hub...")
        try:
            login(token=HF_TOKEN)
            hf_api = HfApi()
            print(f"Attempting initial download of {RESULTS_FILENAME_IN_REPO} from {DATASET_REPO_ID}")
            hf_hub_download(
                repo_id=DATASET_REPO_ID,
                filename=RESULTS_FILENAME_IN_REPO,
                repo_type="dataset",
                token=HF_TOKEN,
                local_dir=TEMP_DIR,
                local_dir_use_symlinks=False
            )
            print(f"Successfully downloaded existing {RESULTS_FILENAME_IN_REPO} to {LOCAL_RESULTS_FILE}")
        except EntryNotFoundError:
            print(f"{RESULTS_FILENAME_IN_REPO} not found in repo. Will create locally.")
        except RepositoryNotFoundError:
            print(f"Error: Dataset repository {DATASET_REPO_ID} not found or token lacks permissions.")
            print("Results saving will be disabled.")
            hf_api = None
        except Exception as e:
            print(f"Error during initial download/login: {e}")
            print("Proceeding without initial download. File will be created locally.")
    else:
        print("Warning: HF_TOKEN secret not found. Results will not be saved to the Hub.")
        hf_api = None

# --- Data Loading ---

def find_image(folder_path: Path, base_name: str) -> Path | None:
    for ext in IMAGE_EXTENSIONS:
        file_path = folder_path / f"{base_name}{ext}"
        if file_path.exists():
            return file_path
    return None

def get_sample_ids() -> list[str]:
    sample_ids = []
    if DATA_DIR.is_dir():
        for item in DATA_DIR.iterdir():
            if item.is_dir():
                prompt_file = item / "prompt.txt"
                input_bg = find_image(item, "input_bg")
                input_fg = find_image(item, "input_fg")
                output_baseline = find_image(item, "baseline")
                output_tficon = find_image(item, "tf-icon")
                if prompt_file.exists() and input_bg and input_fg and output_baseline and output_tficon:
                    sample_ids.append(item.name)
    return sample_ids

def load_sample_data(sample_id: str) -> dict | None:
    sample_path = DATA_DIR / sample_id
    if not sample_path.is_dir():
        return None

    prompt_file = sample_path / "prompt.txt"
    input_bg_path = find_image(sample_path, "input_bg")
    input_fg_path = find_image(sample_path, "input_fg")
    output_baseline_path = find_image(sample_path, "baseline")
    output_tficon_path = find_image(sample_path, "tf-icon")

    if not all([prompt_file.exists(), input_bg_path, input_fg_path, output_baseline_path, output_tficon_path]):
        print(f"Warning: Missing files in sample {sample_id}")
        return None

    try:
        prompt = prompt_file.read_text().strip()
    except Exception as e:
        print(f"Error reading prompt for {sample_id}: {e}")
        return None

    return {
        "id": sample_id,
        "prompt": prompt,
        "input_bg": str(input_bg_path),
        "input_fg": str(input_fg_path),
        "output_baseline": str(output_baseline_path),
        "output_tficon": str(output_tficon_path),
    }

# --- State and UI Logic ---

INITIAL_SAMPLE_IDS = get_sample_ids()

def get_next_sample(available_ids: list[str]) -> tuple[dict | None, list[str]]:
    if not available_ids:
        return None, []
    chosen_id = random.choice(available_ids)
    remaining_ids = [id for id in available_ids if id != chosen_id]
    sample_data = load_sample_data(chosen_id)
    return sample_data, remaining_ids

def display_new_sample(state: dict, available_ids: list[str]):
    sample_data, remaining_ids = get_next_sample(available_ids)

    if not sample_data:
        return {
            prompt_display: gr.update(value="**Prompt:** No more samples available. Thank you!"),
            input_bg_display: gr.update(value=None, visible=False),
            input_fg_display: gr.update(value=None, visible=False),
            output_a_display: gr.update(value=None, visible=False),
            output_b_display: gr.update(value=None, visible=False),
            choice_button_a: gr.update(visible=False),
            choice_button_b: gr.update(visible=False),
            next_button: gr.update(visible=False),
            status_display: gr.update(value="**Status:** Completed!"),
            app_state: state,
            available_samples_state: remaining_ids
        }

    outputs = [
        {"model_name": "baseline", "path": sample_data["output_baseline"]},
        {"model_name": "tf-icon", "path": sample_data["output_tficon"]},
    ]
    random.shuffle(outputs)
    output_a = outputs[0]
    output_b = outputs[1]

    state = {
        "current_sample_id": sample_data["id"],
        "output_a_model_name": output_a["model_name"],
        "output_b_model_name": output_b["model_name"],
    }

    return {
        prompt_display: gr.update(value=f"**Prompt:** {sample_data['prompt']}"),
        input_bg_display: gr.update(value=sample_data["input_bg"], visible=True),
        input_fg_display: gr.update(value=sample_data["input_fg"], visible=True),
        output_a_display: gr.update(value=output_a["path"], visible=True),
        output_b_display: gr.update(value=output_b["path"], visible=True),
        choice_button_a: gr.update(visible=True, interactive=True),
        choice_button_b: gr.update(visible=True, interactive=True),
        next_button: gr.update(visible=False),
        status_display: gr.update(value="**Status:** Please choose the image you prefer."),
        app_state: state,
        available_samples_state: remaining_ids
    }

def record_preference(choice: str, state: dict, request: gr.Request):
    if not request:
        print("Error: Request object is None. Cannot get session ID.")
        session_id = "unknown_session"
    else:
        try:
            session_id = request.client.host
        except AttributeError:
            print("Error: request.client is None or has no 'host' attribute.")
            session_id = "unknown_client"

    if not state or "current_sample_id" not in state:
        print("Warning: State missing, cannot record preference.")
        return {
            choice_button_a: gr.update(interactive=False),
            choice_button_b: gr.update(interactive=False),
            next_button: gr.update(visible=True, interactive=True),
            status_display: gr.update(value="**Status:** Error: Session state lost. Click Next Sample."),
            app_state: state
        }

    chosen_model_name = state["output_a_model_name"] if choice == "A" else state["output_b_model_name"]
    baseline_display = "A" if state["output_a_model_name"] == "baseline" else "B"
    tficon_display = "B" if state["output_a_model_name"] == "baseline" else "A"

    new_row = {
        "timestamp": datetime.now().isoformat(),
        "session_id": session_id,
        "sample_id": state["current_sample_id"],
        "baseline_displayed_as": baseline_display,
        "tficon_displayed_as": tficon_display,
        "chosen_display": choice,
        "chosen_model_name": chosen_model_name
    }
    header = list(new_row.keys())

    try:
        with upload_lock:
            file_exists = LOCAL_RESULTS_FILE.exists()
            mode = 'a' if file_exists else 'w'
            with open(LOCAL_RESULTS_FILE, mode, newline='', encoding='utf-8') as f:
                writer = csv.DictWriter(f, fieldnames=header)
                if not file_exists or os.path.getsize(LOCAL_RESULTS_FILE) == 0:
                    writer.writeheader()
                    print(f"Created or wrote header to {LOCAL_RESULTS_FILE}")
                writer.writerow(new_row)
            print(f"Appended preference for {state['current_sample_id']} to local file.")
            new_preferences_recorded_since_last_upload.set()

    except Exception as e:
        print(f"Error writing local results file {LOCAL_RESULTS_FILE}: {e}")
        return {
            choice_button_a: gr.update(interactive=False),
            choice_button_b: gr.update(interactive=False),
            next_button: gr.update(visible=True, interactive=True),
            status_display: gr.update(value=f"**Status:** Error saving preference locally: {e}. Click Next."),
            app_state: state
        }

    return {
        choice_button_a: gr.update(interactive=False),
        choice_button_b: gr.update(interactive=False),
        next_button: gr.update(visible=True, interactive=True),
        status_display: gr.update(value=f"**Status:** Preference recorded (Chose {choice}). Click Next Sample."),
        app_state: state
    }

def upload_preferences_to_hub():
    print("Periodic upload check triggered.")
    if not hf_api:
        print("Upload check skipped: Hugging Face API not available.")
        return

    if not new_preferences_recorded_since_last_upload.is_set():
        print("Upload check skipped: No new preferences recorded since last upload.")
        return

    with upload_lock:
        if not new_preferences_recorded_since_last_upload.is_set():
            print("Upload check skipped (race condition avoided): No new preferences.")
            return

        if not LOCAL_RESULTS_FILE.exists() or os.path.getsize(LOCAL_RESULTS_FILE) == 0:
            print("Upload check skipped: Local results file is missing or empty.")
            new_preferences_recorded_since_last_upload.clear()
            return

        try:
            print(f"Attempting to upload {LOCAL_RESULTS_FILE} to {DATASET_REPO_ID}/{RESULTS_FILENAME_IN_REPO}")
            start_time = time.time()
            hf_api.upload_file(
                path_or_fileobj=str(LOCAL_RESULTS_FILE),
                path_in_repo=RESULTS_FILENAME_IN_REPO,
                repo_id=DATASET_REPO_ID,
                repo_type="dataset",
                commit_message=f"Periodic upload of preferences - {datetime.now().isoformat()}"
            )
            end_time = time.time()
            print(f"Successfully uploaded preferences. Took {end_time - start_time:.2f} seconds.")
            new_preferences_recorded_since_last_upload.clear()
        except Exception as e:
            print(f"Error uploading results file: {e}")

def handle_choice_a(state: dict, request: gr.Request):
    return record_preference("A", state, request)

def handle_choice_b(state: dict, request: gr.Request):
    return record_preference("B", state, request)

with gr.Blocks(title="Image Composition User Study") as demo:
    gr.Markdown("# Image Composition User Study")
    gr.Markdown(
        "> Please look at the input images and the prompt below. "
        "Then, compare the two output images (Output A and Output B) and click the button below the one you prefer."
    )

    app_state = gr.State({})
    available_samples_state = gr.State(INITIAL_SAMPLE_IDS)

    status_display = gr.Markdown("**Status:** Loading first sample...")

    gr.Markdown("## Inputs")
    with gr.Row():
        prompt_display = gr.Markdown("**Prompt:** Loading...")
    with gr.Row():
        with gr.Column():
            gr.Markdown("<div style='text-align: center;'>Input Background</div>")
            input_bg_display = gr.Image(type="filepath", height=250, width=250, interactive=False, show_label=False)
        with gr.Column():
            gr.Markdown("<div style='text-align: center;'>Input Foreground</div>")
            input_fg_display = gr.Image(type="filepath", height=250, width=250, interactive=False, show_label=False)

    gr.Markdown("---")
    gr.Markdown("## Choose your preferred output")

    with gr.Row():
        with gr.Column():
            output_a_display = gr.Image(label="Output A", type="filepath", height=400, width=400, interactive=False)
            choice_button_a = gr.Button("Choose Output A", variant="primary")
        with gr.Column():
            output_b_display = gr.Image(label="Output B", type="filepath", height=400, width=400, interactive=False)
            choice_button_b = gr.Button("Choose Output B", variant="primary")

    next_button = gr.Button("🔁 Next Sample 🔁", visible=False)

    demo.load(
        fn=display_new_sample,
        inputs=[app_state, available_samples_state],
        outputs=[
            prompt_display, input_bg_display, input_fg_display,
            output_a_display, output_b_display,
            choice_button_a, choice_button_b, next_button, status_display,
            app_state, available_samples_state
        ]
    )

    choice_button_a.click(
        fn=handle_choice_a,
        inputs=[app_state],
        outputs=[choice_button_a, choice_button_b, next_button, status_display, app_state],
        api_name=False,
    )

    choice_button_b.click(
        fn=handle_choice_b,
        inputs=[app_state],
        outputs=[choice_button_a, choice_button_b, next_button, status_display, app_state],
        api_name=False,
    )

    next_button.click(
        fn=display_new_sample,
        inputs=[app_state, available_samples_state],
        outputs=[
            prompt_display, input_bg_display, input_fg_display,
            output_a_display, output_b_display,
            choice_button_a, choice_button_b, next_button, status_display,
            app_state, available_samples_state
        ],
        api_name=False,
    )

def cleanup_temp_dir():
    if Path(TEMP_DIR).exists():
        print(f"Cleaning up temporary directory: {TEMP_DIR}")
        shutil.rmtree(TEMP_DIR, ignore_errors=True)

def shutdown_hook():
    print("Application shutting down. Performing final upload check...")
    upload_preferences_to_hub()
    if scheduler.running:
        print("Shutting down scheduler...")
        scheduler.shutdown(wait=False)
    cleanup_temp_dir()
    print("Shutdown complete.")

atexit.register(shutdown_hook)

if __name__ == "__main__":
    initialize_hub_and_results()

    if not INITIAL_SAMPLE_IDS:
        print("Error: No valid samples found in the 'data' directory.")
        print("Please ensure the 'data' directory exists and contains subdirectories")
        print("named like 'sample_id', each with 'prompt.txt', 'input_bg.*',")
        print("'input_fg.*', 'baseline.*', and 'tf-icon.*' files.")
    elif not DATASET_REPO_ID:
        print("Error: DATASET_REPO_ID environment variable is not set or is set to the default placeholder.")
        print("Please set the DATASET_REPO_ID environment variable or update the script.")
    elif hf_api:
        print(f"Starting periodic upload scheduler (every {UPLOAD_INTERVAL_HOURS} hours)...")
        scheduler.add_job(upload_preferences_to_hub, 'interval', hours=UPLOAD_INTERVAL_HOURS)
        scheduler.start()
        print(f"Found {len(INITIAL_SAMPLE_IDS)} samples.")
        print(f"Configured to save results periodically to Hugging Face Dataset: {DATASET_REPO_ID}")
        print("Starting Gradio app...")
        demo.launch(server_name="0.0.0.0")
    else:
        print("Warning: Running without Hugging Face Hub integration (HF_TOKEN or DATASET_REPO_ID missing/invalid).")
        print(f"Found {len(INITIAL_SAMPLE_IDS)} samples.")
        print("Starting Gradio app...")
        demo.launch(server_name="0.0.0.0")