davanstrien's picture
davanstrien HF Staff
draft app
a2bb2cd
raw
history blame
4.71 kB
import gradio as gr
from httpx import Client
import pandas as pd
from datasets import Dataset
client = Client()
from io import StringIO
from datasets import ClassLabel
from datasets import Image
USER_DATA = {}
def update_user_data(api_key, space_url, hub_api_key, hub_dataset_id):
USER_DATA["api_key"] = api_key
USER_DATA["space_url"] = space_url
USER_DATA["hub_api_key"] = hub_api_key
USER_DATA["hub_dataset_id"] = hub_dataset_id
def check_user_data():
return bool(USER_DATA.get("api_key") and USER_DATA.get("space_url"))
# def list_projects():
# headers = {"Authorization": f'Token {USER_DATA["api_key"]}'}
# resp = client.get(
# "https://davanstrien-label-studio.hf.space/api/projects/", headers=headers
# )
# return resp.json()
# def get_column_names():
# headers = {"Authorization": f'Token {USER_DATA["api_key"]}'}
# print(headers)
# # resp = client.get(
# # "http://davanstrien-label-studio.hf.space/api/projects/1/export?exportType=CSV",
# # headers=headers,
# # )
# resp = requests.get(
# "http://davanstrien-label-studio.hf.space/api/projects/1/export?exportType=CSV",
# headers=headers,
# )
# return pd.read_csv(StringIO(resp.text)).columns.tolist()
def push_annotations_to_hub(project_id, input_column, input_column_type, label_column):
headers = {"Authorization": f'Token {USER_DATA["api_key"]}'}
resp = client.get(
f"{USER_DATA['space_url']}/api/projects/{int(project_id)}/export?exportType=CSV",
headers=headers,
)
df = pd.read_csv(StringIO(resp.text))
print(df.head(1))
labels = df[label_column].unique().tolist()
ds = Dataset.from_pandas(df)
ds = ds.cast_column(label_column, ClassLabel(names=labels))
if input_column_type == "image":
ds = ds.cast_column(input_column, Image())
ds.push_to_hub(USER_DATA["hub_dataset_id"], token=USER_DATA["hub_api_key"])
return ds.to_pandas().head(5)
with gr.Blocks() as demo:
gr.Markdown("# Push label studio datasets to the hub")
gr.Markdown(
"This is a proof of concept app which provides a GUI for exporting data from Label Studio and pushing the loaded dataset to the Hugging Face Hub"
)
with gr.Row():
with gr.Column():
with gr.Row():
gr.Markdown("## Label Studio details")
with gr.Row():
API_KEY = gr.Textbox(
type="password",
label="Label Studio API Key",
)
with gr.Row():
with gr.Row():
gr.Markdown(
"Space URL, this can be found by clicking on the three dots button on your space and copying the URL shown after clicking the Embed Space button"
)
with gr.Row():
SPACE_URL = gr.Textbox(
"e.g. https://davanstrien-label-studio.hf.space/",
label="Space URL",
placeholder="https://space.example.com",
)
with gr.Column():
gr.Markdown("## Hub Dataset info")
gr.Markdown(
"""Enter a Hub API key with write access and the name you would like to use for your dataset"""
)
HUB_API_KEY = gr.Textbox(
type="password",
label="Hub API Key",
)
with gr.Row():
gr.Markdown("Name of the dataset you would like to create")
with gr.Row():
HUB_DATASET_ID = gr.Textbox(
"e.g. davanstrien/dataset_name",
label="Dataset name",
placeholder="https://space.example.com",
)
button = gr.Button("Submit details")
button.click(update_user_data, [API_KEY, SPACE_URL, HUB_API_KEY, HUB_DATASET_ID])
with gr.Row():
project_id = gr.Number(1, label="Project ID")
input_column = gr.Textbox("text", type="text", label="Input column")
input_column_type = gr.Dropdown(
choices=["text", "image"], label="Input column type", value="text"
)
label_column = gr.Textbox("choice", type="text", label="Label column")
button = gr.Button("Push annotations to Hub")
with gr.Row():
gr.Markdown("## Preview of your dataset")
with gr.Row():
preview = gr.DataFrame()
button.click(
push_annotations_to_hub,
[
project_id,
input_column,
input_column_type,
label_column,
],
preview,
)
demo.launch(debug=True)