data-labeler / app.py
osbm's picture
refactor update_vote function to streamline image voting process and improve login handling
9491399
raw
history blame
4.23 kB
import os
import streamlit as st
import pandas as pd
import numpy as np
import huggingface_hub as hfh
import requests
os.makedirs("labels", exist_ok=True)
voters = [
"osman",
"eren",
"robin",
"mira",
"bilal",
"volunteer-1",
"volunteer-2",
"volunteer-3",
"volunteer-4",
"volunteer-5",
]
api = hfh.HfApi(token=os.environ.get("hf_token"))
def get_list_of_images():
files = api.list_repo_tree(repo_id="aifred-smart-life-coach/capstone-images", repo_type="dataset", recursive=True,)
files = [file.path for file in files if file.path.endswith((".png", ".jpg"))]
return files
def get_one_from_queue(voter: str):
# get an image for the voter or return False if no image is left
# aifred-smart-life-coach/labels labels dataset
# labels dataset multiple csv files named as [voter name].csv
# each csv file has the image image path vote date, votes
url = f"https://huggingface.co/datasets/aifred-smart-life-coach/labels/raw/main/{voter}.csv"
# fetch file and save it to the labels folder
file_path = f"labels/{voter}.csv"
req = requests.get(url)
with open(file_path, "wb") as file:
file.write(req.content)
df = pd.read_csv(file_path)
print(df)
num_past_votes = df.shape[0]
print("num_past_votes", num_past_votes)
list_of_images = get_list_of_images()
print("list_of_images", len(list_of_images))
# get the list of images that are not present in the csv file
images_not_voted = list(set(list_of_images) - set(df["image_path"].tolist()))
print("images_not_voted", len(images_not_voted))
return images_not_voted[0] if images_not_voted else False
def update_vote(
voter: str,
image: str,
healthiness: int,
fat_level: int,
muscle_level: int,
):
url = f"https://huggingface.co/datasets/aifred-smart-life-coach/labels/raw/main/{voter}.csv"
# fetch file and save it to the labels folder
file_path = f"labels/{voter}.csv"
req = requests.get(url)
with open(file_path, "wb") as file:
file.write(req.content)
df = pd.read_csv(file_path)
print(df)
new_row = {
"image_path": image,
"healthiness": healthiness,
"fat_level": fat_level,
"muscle_level": muscle_level,
}
df = df.append(new_row, ignore_index=True)
df.to_csv(file_path, index=False)
# push the file to the dataset
api.push_to_hub(file_path, repo_id="aifred-smart-life-coach/labels", repo_type="dataset", commit_message=f"Voted for {image}")
if 'loggedin' not in st.session_state:
st.session_state['loggedin'] = 'false'
with st.form("login"):
username = st.selectbox("Select voter", voters)
password = st.text_input("Password (get password from [email protected])", type="password")
submitted = st.form_submit_button("Login")
if submitted or st.session_state['loggedin'] == 'true':
if not password == os.environ.get("app_password"):
st.error("The password you entered is incorrect")
st.stop()
else:
st.success("Welcome, " + username)
st.write("You are now logged in")
st.session_state['loggedin'] = 'true'
with st.form("images"):
image_path = get_one_from_queue(username)
if not image_path:
st.write("You have voted for all the images")
st.stop()
path = hfh.hf_hub_download(repo_id="aifred-smart-life-coach/capstone-images", repo_type="dataset",filename=image_path, token=os.environ.get("hf_token"))
st.image(path, width=300)
gender = st.selectbox("Gender", [
"Male",
"Female",
"Non-defining",
])
healthiness = st.slider("How healthy is this picture?", 0, 100, 50)
fat_level = st.slider("How fat is this picture?", 0, 100, 50)
muscle_level = st.slider("How muscular is this picture?", 0, 100, 50)
# Every form must have a submit button.
submitted_second = st.form_submit_button("Submit")
if submitted_second:
update_vote(username, image_path, healthiness, fat_level, muscle_level)
st.write("Vote submitted")
# push the data to the database
st.write("Outside the form")