File size: 5,676 Bytes
d9cbdf1
138d0d5
 
d9cbdf1
8823c0c
138d0d5
 
57bf5d5
8823c0c
138d0d5
 
fadec32
 
 
138d0d5
 
 
 
 
 
 
 
 
 
ff62d04
8823c0c
138d0d5
 
d88bb9d
138d0d5
 
ccaeded
138d0d5
8823c0c
138d0d5
 
 
 
8823c0c
138d0d5
 
8823c0c
138d0d5
 
 
 
 
 
 
fadec32
 
 
 
 
 
138d0d5
 
 
 
fadec32
138d0d5
 
8823c0c
138d0d5
 
 
 
8823c0c
 
138d0d5
 
 
8823c0c
138d0d5
 
 
d88bb9d
 
138d0d5
 
 
 
8823c0c
31b5d12
138d0d5
31b5d12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8823c0c
 
ccaeded
138d0d5
 
 
 
 
 
 
 
 
8823c0c
138d0d5
 
8823c0c
60f83a0
19ec3d7
ccaeded
19ec3d7
8823c0c
60f83a0
 
8823c0c
 
 
ccaeded
60f83a0
ccaeded
19ec3d7
8823c0c
138d0d5
 
8823c0c
 
 
 
 
 
 
 
 
 
 
31b5d12
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import streamlit as st
import pandas as pd
from huggingface_hub import HfApi, hf_hub_download

# Constants
HF_REPO = "giobin/MAIA_human_assessment_annotations"
CSV_FILENAME = "user_selections.csv"

# Function to assign samples to users
def assign_samples(csv_path):
    df = pd.read_csv(csv_path)
    group_1 = df[(df["pool_pos"] == 1) & (~df["question_category"].str.endswith("_B"))].head(5)
    group_2 = df[(df["pool_pos"] == 2) & (~df["question_category"].str.endswith("_B"))].head(5)
    group_3 = df[(df["pool_pos"] == 3) & (~df["question_category"].str.endswith("_B"))].head(5)
    return {
        "Bernardo": group_1,
        "Alessandro": group_1,
        "Alessio": group_1,
        "Lenci": group_2,
        "Lucia": group_2,
        "Davide": group_2,
        "Giovanni": group_3,
        "Raffaella": group_3,
    }

# Function to load existing annotations from Hugging Face Hub
def load_existing_annotations():
    try:
        file_path = hf_hub_download(HF_REPO, CSV_FILENAME, repo_type="dataset", token=st.secrets["HF_TOKEN"])
        return pd.read_csv(file_path)
    except Exception:
        return pd.DataFrame(columns=["username", "id"])

# Load datasets
csv_file = "static/mc.csv"
assignments = assign_samples(csv_file)
existing_annotations = load_existing_annotations()

# Valid users
valid_users = list(assignments.keys())

# Initialize session state variables
if "username" not in st.session_state:
    st.session_state.username = None
if "index" not in st.session_state:
    st.session_state.index = 0
if "results" not in st.session_state:
    st.session_state.results = []

# User selectiontion
def update_name():
    """Set username and reset index."""
    st.session_state.username = st.session_state.selected_user
    st.session_state.index = 0  # Reset progress

if st.session_state.username is None:
    with st.form("user_form"):
        st.write("### Select Your Name")
        selected_user = st.selectbox("Choose your name:", valid_users, key="selected_user")
        submit_button = st.form_submit_button("Start", on_click=update_name)
    st.stop()

# Retrieve assigned dataset and filter out already labeled samples
full_dataset = assignments[st.session_state.username].reset_index(drop=True)
user_labeled_ids = existing_annotations[existing_annotations["username"] == st.session_state.username]["id"].tolist()
dataset = full_dataset[~full_dataset["id"].isin(user_labeled_ids)].reset_index(drop=True)

# Check if all samples are labeled
if st.session_state.index >= len(dataset):
    st.write("### Great! You have completed your assignment. πŸŽ‰")
    st.stop()

# Function to push updated annotations to Hugging Face Hub
def push_to_hf_hub(csv_path):
    api = HfApi()
    try:
        api.create_repo(HF_REPO, repo_type="dataset", exist_ok=True, token=st.secrets["HF_TOKEN"])
        api.upload_file(path_or_fileobj=csv_path, path_in_repo=CSV_FILENAME, repo_id=HF_REPO, repo_type="dataset", token=st.secrets["HF_TOKEN"])
        print(f"Dataset updated: https://huggingface.co/datasets/{HF_REPO}")
    except Exception as e:
        print(f"Error pushing to HF: {e}")

# Function to save user choice
def save_choice():
    sample = dataset.iloc[st.session_state.index]
    selected_answer = st.session_state.get("selected_answer", None)
    not_enough_info = st.session_state.get("not_enough_info", False)
    
    if selected_answer is not None:
        st.session_state.results.append({
            "username": st.session_state.username,
            "id": sample["id"],
            "video_id": sample["video_id"],
            "answer1": sample["answer1"],
            "answer2": sample["answer2"],
            "selected_answer": selected_answer,
            "target": sample["target"],
            "not_enough_info": not_enough_info
        })
        st.session_state.index += 1

    # Save results and push to Hugging Face Hub if all samples are labeled
    if st.session_state.index >= len(dataset):
        st.write("### Great! You have completed your assignment. πŸŽ‰")
        result_df = pd.DataFrame(st.session_state.results)
        csv_path = "user_selections.csv"
        if not existing_annotations.empty:
            result_df = pd.concat([existing_annotations, result_df]).drop_duplicates(subset=["username", "id"], keep="last")
        result_df.to_csv(csv_path, index=False)
        push_to_hf_hub(csv_path)
        st.stop()

# Display current sample
sample = dataset.iloc[st.session_state.index]

# Page title and user information
st.markdown("<h1 style='text-align: center; font-size: 50px;'>MAIA Sample</h1>", unsafe_allow_html=True)
st.markdown(f"<h3 style='text-align: center;'>User: {st.session_state.username}</h3>", unsafe_allow_html=True)
st.write("\n\n")

# Instructions
st.markdown("""
### Instructions:
- Look at the video thumbnail; do not play it.
- Select the correct description (A or B).
- If the frame does not provide enough information to answer the question, select the checkbox.
- Click 'Next' to proceed.
""")
st.write("---")

# Display video thumbnail
st.video(sample["video_url"])

# Form for user input
with st.form("annotation_form"):
    # Exclusive choice between A and B
    selected_answer = st.radio(
        "Choose the correct description:",
        options=[0, 1],
        format_func=lambda x: f"A: {sample['answer1']}" if x == 0 else f"B: {sample['answer2']}",
        key="selected_answer"
    )

    # Independent checkbox for insufficient information
    not_enough_info = st.checkbox("The frame does not provide enough information to answer the question.", key="not_enough_info")

    # Submit button
    submit_button = st.form_submit_button("Next", on_click=save_choice)