Spaces:
Sleeping
Sleeping
File size: 5,676 Bytes
d9cbdf1 138d0d5 d9cbdf1 8823c0c 138d0d5 57bf5d5 8823c0c 138d0d5 fadec32 138d0d5 ff62d04 8823c0c 138d0d5 d88bb9d 138d0d5 ccaeded 138d0d5 8823c0c 138d0d5 8823c0c 138d0d5 8823c0c 138d0d5 fadec32 138d0d5 fadec32 138d0d5 8823c0c 138d0d5 8823c0c 138d0d5 8823c0c 138d0d5 d88bb9d 138d0d5 8823c0c 31b5d12 138d0d5 31b5d12 8823c0c ccaeded 138d0d5 8823c0c 138d0d5 8823c0c 60f83a0 19ec3d7 ccaeded 19ec3d7 8823c0c 60f83a0 8823c0c ccaeded 60f83a0 ccaeded 19ec3d7 8823c0c 138d0d5 8823c0c 31b5d12 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import streamlit as st
import pandas as pd
from huggingface_hub import HfApi, hf_hub_download
# Constants
HF_REPO = "giobin/MAIA_human_assessment_annotations"
CSV_FILENAME = "user_selections.csv"
# Function to assign samples to users
def assign_samples(csv_path):
df = pd.read_csv(csv_path)
group_1 = df[(df["pool_pos"] == 1) & (~df["question_category"].str.endswith("_B"))].head(5)
group_2 = df[(df["pool_pos"] == 2) & (~df["question_category"].str.endswith("_B"))].head(5)
group_3 = df[(df["pool_pos"] == 3) & (~df["question_category"].str.endswith("_B"))].head(5)
return {
"Bernardo": group_1,
"Alessandro": group_1,
"Alessio": group_1,
"Lenci": group_2,
"Lucia": group_2,
"Davide": group_2,
"Giovanni": group_3,
"Raffaella": group_3,
}
# Function to load existing annotations from Hugging Face Hub
def load_existing_annotations():
try:
file_path = hf_hub_download(HF_REPO, CSV_FILENAME, repo_type="dataset", token=st.secrets["HF_TOKEN"])
return pd.read_csv(file_path)
except Exception:
return pd.DataFrame(columns=["username", "id"])
# Load datasets
csv_file = "static/mc.csv"
assignments = assign_samples(csv_file)
existing_annotations = load_existing_annotations()
# Valid users
valid_users = list(assignments.keys())
# Initialize session state variables
if "username" not in st.session_state:
st.session_state.username = None
if "index" not in st.session_state:
st.session_state.index = 0
if "results" not in st.session_state:
st.session_state.results = []
# User selectiontion
def update_name():
"""Set username and reset index."""
st.session_state.username = st.session_state.selected_user
st.session_state.index = 0 # Reset progress
if st.session_state.username is None:
with st.form("user_form"):
st.write("### Select Your Name")
selected_user = st.selectbox("Choose your name:", valid_users, key="selected_user")
submit_button = st.form_submit_button("Start", on_click=update_name)
st.stop()
# Retrieve assigned dataset and filter out already labeled samples
full_dataset = assignments[st.session_state.username].reset_index(drop=True)
user_labeled_ids = existing_annotations[existing_annotations["username"] == st.session_state.username]["id"].tolist()
dataset = full_dataset[~full_dataset["id"].isin(user_labeled_ids)].reset_index(drop=True)
# Check if all samples are labeled
if st.session_state.index >= len(dataset):
st.write("### Great! You have completed your assignment. π")
st.stop()
# Function to push updated annotations to Hugging Face Hub
def push_to_hf_hub(csv_path):
api = HfApi()
try:
api.create_repo(HF_REPO, repo_type="dataset", exist_ok=True, token=st.secrets["HF_TOKEN"])
api.upload_file(path_or_fileobj=csv_path, path_in_repo=CSV_FILENAME, repo_id=HF_REPO, repo_type="dataset", token=st.secrets["HF_TOKEN"])
print(f"Dataset updated: https://huggingface.co/datasets/{HF_REPO}")
except Exception as e:
print(f"Error pushing to HF: {e}")
# Function to save user choice
def save_choice():
sample = dataset.iloc[st.session_state.index]
selected_answer = st.session_state.get("selected_answer", None)
not_enough_info = st.session_state.get("not_enough_info", False)
if selected_answer is not None:
st.session_state.results.append({
"username": st.session_state.username,
"id": sample["id"],
"video_id": sample["video_id"],
"answer1": sample["answer1"],
"answer2": sample["answer2"],
"selected_answer": selected_answer,
"target": sample["target"],
"not_enough_info": not_enough_info
})
st.session_state.index += 1
# Save results and push to Hugging Face Hub if all samples are labeled
if st.session_state.index >= len(dataset):
st.write("### Great! You have completed your assignment. π")
result_df = pd.DataFrame(st.session_state.results)
csv_path = "user_selections.csv"
if not existing_annotations.empty:
result_df = pd.concat([existing_annotations, result_df]).drop_duplicates(subset=["username", "id"], keep="last")
result_df.to_csv(csv_path, index=False)
push_to_hf_hub(csv_path)
st.stop()
# Display current sample
sample = dataset.iloc[st.session_state.index]
# Page title and user information
st.markdown("<h1 style='text-align: center; font-size: 50px;'>MAIA Sample</h1>", unsafe_allow_html=True)
st.markdown(f"<h3 style='text-align: center;'>User: {st.session_state.username}</h3>", unsafe_allow_html=True)
st.write("\n\n")
# Instructions
st.markdown("""
### Instructions:
- Look at the video thumbnail; do not play it.
- Select the correct description (A or B).
- If the frame does not provide enough information to answer the question, select the checkbox.
- Click 'Next' to proceed.
""")
st.write("---")
# Display video thumbnail
st.video(sample["video_url"])
# Form for user input
with st.form("annotation_form"):
# Exclusive choice between A and B
selected_answer = st.radio(
"Choose the correct description:",
options=[0, 1],
format_func=lambda x: f"A: {sample['answer1']}" if x == 0 else f"B: {sample['answer2']}",
key="selected_answer"
)
# Independent checkbox for insufficient information
not_enough_info = st.checkbox("The frame does not provide enough information to answer the question.", key="not_enough_info")
# Submit button
submit_button = st.form_submit_button("Next", on_click=save_choice)
|