Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from httpx import Client | |
| import pandas as pd | |
| from datasets import Dataset | |
| client = Client() | |
| from io import StringIO | |
| from datasets import ClassLabel | |
| from datasets import Image | |
| USER_DATA = {} | |
| def update_user_data(api_key, space_url, hub_api_key, hub_dataset_id): | |
| USER_DATA["api_key"] = api_key | |
| USER_DATA["space_url"] = space_url | |
| USER_DATA["hub_api_key"] = hub_api_key | |
| USER_DATA["hub_dataset_id"] = hub_dataset_id | |
| def check_user_data(): | |
| return bool(USER_DATA.get("api_key") and USER_DATA.get("space_url")) | |
| # def list_projects(): | |
| # headers = {"Authorization": f'Token {USER_DATA["api_key"]}'} | |
| # resp = client.get( | |
| # "https://davanstrien-label-studio.hf.space/api/projects/", headers=headers | |
| # ) | |
| # return resp.json() | |
| # def get_column_names(): | |
| # headers = {"Authorization": f'Token {USER_DATA["api_key"]}'} | |
| # print(headers) | |
| # # resp = client.get( | |
| # # "http://davanstrien-label-studio.hf.space/api/projects/1/export?exportType=CSV", | |
| # # headers=headers, | |
| # # ) | |
| # resp = requests.get( | |
| # "http://davanstrien-label-studio.hf.space/api/projects/1/export?exportType=CSV", | |
| # headers=headers, | |
| # ) | |
| # return pd.read_csv(StringIO(resp.text)).columns.tolist() | |
| def push_annotations_to_hub(project_id, input_column, input_column_type, label_column): | |
| headers = {"Authorization": f'Token {USER_DATA["api_key"]}'} | |
| resp = client.get( | |
| f"{USER_DATA['space_url']}/api/projects/{int(project_id)}/export?exportType=CSV", | |
| headers=headers, | |
| ) | |
| df = pd.read_csv(StringIO(resp.text)) | |
| print(df.head(1)) | |
| labels = df[label_column].unique().tolist() | |
| ds = Dataset.from_pandas(df) | |
| ds = ds.cast_column(label_column, ClassLabel(names=labels)) | |
| if input_column_type == "image": | |
| ds = ds.cast_column(input_column, Image()) | |
| ds.push_to_hub(USER_DATA["hub_dataset_id"], token=USER_DATA["hub_api_key"]) | |
| return ds.to_pandas().head(5) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Push label studio datasets to the hub") | |
| gr.Markdown( | |
| "This is a proof of concept app which provides a GUI for exporting data from Label Studio and pushing the loaded dataset to the Hugging Face Hub" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Row(): | |
| gr.Markdown("## Label Studio details") | |
| with gr.Row(): | |
| API_KEY = gr.Textbox( | |
| type="password", | |
| label="Label Studio API Key", | |
| ) | |
| with gr.Row(): | |
| with gr.Row(): | |
| gr.Markdown( | |
| "Space URL, this can be found by clicking on the three dots button on your space and copying the URL shown after clicking the Embed Space button" | |
| ) | |
| with gr.Row(): | |
| SPACE_URL = gr.Textbox( | |
| "e.g. https://davanstrien-label-studio.hf.space/", | |
| label="Space URL", | |
| placeholder="https://space.example.com", | |
| ) | |
| with gr.Column(): | |
| gr.Markdown("## Hub Dataset info") | |
| gr.Markdown( | |
| """Enter a Hub API key with write access and the name you would like to use for your dataset""" | |
| ) | |
| HUB_API_KEY = gr.Textbox( | |
| type="password", | |
| label="Hub API Key", | |
| ) | |
| with gr.Row(): | |
| gr.Markdown("Name of the dataset you would like to create") | |
| with gr.Row(): | |
| HUB_DATASET_ID = gr.Textbox( | |
| "e.g. davanstrien/dataset_name", | |
| label="Dataset name", | |
| placeholder="https://space.example.com", | |
| ) | |
| button = gr.Button("Submit details") | |
| button.click(update_user_data, [API_KEY, SPACE_URL, HUB_API_KEY, HUB_DATASET_ID]) | |
| with gr.Row(): | |
| project_id = gr.Number(1, label="Project ID") | |
| input_column = gr.Textbox("text", type="text", label="Input column") | |
| input_column_type = gr.Dropdown( | |
| choices=["text", "image"], label="Input column type", value="text" | |
| ) | |
| label_column = gr.Textbox("choice", type="text", label="Label column") | |
| button = gr.Button("Push annotations to Hub") | |
| with gr.Row(): | |
| gr.Markdown("## Preview of your dataset") | |
| with gr.Row(): | |
| preview = gr.DataFrame() | |
| button.click( | |
| push_annotations_to_hub, | |
| [ | |
| project_id, | |
| input_column, | |
| input_column_type, | |
| label_column, | |
| ], | |
| preview, | |
| ) | |
| demo.launch(debug=True) | |