Spaces:
Running
Running
nam pham
commited on
Commit
·
9faf7cc
1
Parent(s):
ffa19f8
feat: fix load from huggingface
Browse files- app.py +27 -17
- data/annotated_data.json +0 -0
app.py
CHANGED
@@ -146,6 +146,7 @@ dynamic_dataset = None
|
|
146 |
def load_dataset():
|
147 |
global dynamic_dataset
|
148 |
try:
|
|
|
149 |
with open("data/annotated_data.json", 'rt') as dataset:
|
150 |
ANNOTATED_DATA = json.load(dataset)
|
151 |
dynamic_dataset = DynamicDataset(ANNOTATED_DATA)
|
@@ -530,11 +531,25 @@ def convert_hf_dataset_to_ner_format(dataset):
|
|
530 |
|
531 |
return converted_data
|
532 |
|
533 |
-
def load_from_huggingface(dataset_name: str
|
534 |
"""Load dataset from Hugging Face Hub"""
|
535 |
try:
|
536 |
-
|
537 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
538 |
|
539 |
# Save the converted data
|
540 |
os.makedirs("data", exist_ok=True)
|
@@ -543,7 +558,8 @@ def load_from_huggingface(dataset_name: str, split: str = "all"):
|
|
543 |
|
544 |
return f"Successfully loaded and converted dataset: {dataset_name}"
|
545 |
except Exception as e:
|
546 |
-
|
|
|
547 |
|
548 |
def load_from_local_file(file_path: str, file_format: str = "json"):
|
549 |
"""Load and convert data from local file in various formats"""
|
@@ -891,14 +907,7 @@ with gr.Blocks() as demo:
|
|
891 |
placeholder="Enter dataset name (e.g., conll2003)",
|
892 |
scale=3
|
893 |
)
|
894 |
-
dataset_split = gr.Dropdown(
|
895 |
-
choices=["train", "validation", "test"],
|
896 |
-
value="train",
|
897 |
-
label="Dataset Split",
|
898 |
-
scale=2
|
899 |
-
)
|
900 |
load_dataset_btn = gr.Button("Load Dataset", scale=1)
|
901 |
-
hf_status = gr.Textbox(label="Dataset Loading Status")
|
902 |
|
903 |
bar = gr.Slider(
|
904 |
minimum=0,
|
@@ -1002,16 +1011,17 @@ with gr.Blocks() as demo:
|
|
1002 |
outputs=[inp_box, bar]
|
1003 |
)
|
1004 |
|
1005 |
-
def load_hf_dataset(name
|
1006 |
-
status = load_from_huggingface(name
|
|
|
1007 |
if "Successfully" in status:
|
1008 |
-
return load_dataset()
|
1009 |
-
return [status
|
1010 |
|
1011 |
load_dataset_btn.click(
|
1012 |
fn=load_hf_dataset,
|
1013 |
-
inputs=[dataset_name
|
1014 |
-
outputs=[inp_box, bar
|
1015 |
)
|
1016 |
|
1017 |
apply_btn.click(fn=update_example, inputs=inp_box, outputs=inp_box)
|
|
|
146 |
def load_dataset():
|
147 |
global dynamic_dataset
|
148 |
try:
|
149 |
+
print('load_dataset')
|
150 |
with open("data/annotated_data.json", 'rt') as dataset:
|
151 |
ANNOTATED_DATA = json.load(dataset)
|
152 |
dynamic_dataset = DynamicDataset(ANNOTATED_DATA)
|
|
|
531 |
|
532 |
return converted_data
|
533 |
|
534 |
+
def load_from_huggingface(dataset_name: str):
|
535 |
"""Load dataset from Hugging Face Hub"""
|
536 |
try:
|
537 |
+
# Download the JSON file from Hugging Face
|
538 |
+
import requests
|
539 |
+
import json
|
540 |
+
|
541 |
+
# Construct the raw URL for the JSON file
|
542 |
+
raw_url = f"https://huggingface.co/datasets/{dataset_name}/raw/main/annotated_data.json"
|
543 |
+
|
544 |
+
# Download the file
|
545 |
+
response = requests.get(raw_url)
|
546 |
+
if response.status_code == 200:
|
547 |
+
print('response status', response.status_code)
|
548 |
+
print('response', response.text)
|
549 |
+
dataset = json.loads(response.text)
|
550 |
+
converted_data = dataset # Data is already in the correct format
|
551 |
+
else:
|
552 |
+
raise Exception(f"Failed to download dataset: {response.status_code}")
|
553 |
|
554 |
# Save the converted data
|
555 |
os.makedirs("data", exist_ok=True)
|
|
|
558 |
|
559 |
return f"Successfully loaded and converted dataset: {dataset_name}"
|
560 |
except Exception as e:
|
561 |
+
error_msg = f"Error loading dataset: {str(e)}"
|
562 |
+
return error_msg
|
563 |
|
564 |
def load_from_local_file(file_path: str, file_format: str = "json"):
|
565 |
"""Load and convert data from local file in various formats"""
|
|
|
907 |
placeholder="Enter dataset name (e.g., conll2003)",
|
908 |
scale=3
|
909 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
910 |
load_dataset_btn = gr.Button("Load Dataset", scale=1)
|
|
|
911 |
|
912 |
bar = gr.Slider(
|
913 |
minimum=0,
|
|
|
1011 |
outputs=[inp_box, bar]
|
1012 |
)
|
1013 |
|
1014 |
+
def load_hf_dataset(name):
|
1015 |
+
status = load_from_huggingface(name)
|
1016 |
+
print('status', status)
|
1017 |
if "Successfully" in status:
|
1018 |
+
return load_dataset()
|
1019 |
+
return [("Error loading dataset: " + status, None)], gr.update(value=0, maximum=1)
|
1020 |
|
1021 |
load_dataset_btn.click(
|
1022 |
fn=load_hf_dataset,
|
1023 |
+
inputs=[dataset_name],
|
1024 |
+
outputs=[inp_box, bar]
|
1025 |
)
|
1026 |
|
1027 |
apply_btn.click(fn=update_example, inputs=inp_box, outputs=inp_box)
|
data/annotated_data.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|