nam pham commited on
Commit
9faf7cc
·
1 Parent(s): ffa19f8

feat: fix load from huggingface

Browse files
Files changed (2) hide show
  1. app.py +27 -17
  2. data/annotated_data.json +0 -0
app.py CHANGED
@@ -146,6 +146,7 @@ dynamic_dataset = None
146
  def load_dataset():
147
  global dynamic_dataset
148
  try:
 
149
  with open("data/annotated_data.json", 'rt') as dataset:
150
  ANNOTATED_DATA = json.load(dataset)
151
  dynamic_dataset = DynamicDataset(ANNOTATED_DATA)
@@ -530,11 +531,25 @@ def convert_hf_dataset_to_ner_format(dataset):
530
 
531
  return converted_data
532
 
533
- def load_from_huggingface(dataset_name: str, split: str = "all"):
534
  """Load dataset from Hugging Face Hub"""
535
  try:
536
- dataset = load_dataset(dataset_name, split=split)
537
- converted_data = convert_hf_dataset_to_ner_format(dataset)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538
 
539
  # Save the converted data
540
  os.makedirs("data", exist_ok=True)
@@ -543,7 +558,8 @@ def load_from_huggingface(dataset_name: str, split: str = "all"):
543
 
544
  return f"Successfully loaded and converted dataset: {dataset_name}"
545
  except Exception as e:
546
- return f"Error loading dataset: {str(e)}"
 
547
 
548
  def load_from_local_file(file_path: str, file_format: str = "json"):
549
  """Load and convert data from local file in various formats"""
@@ -891,14 +907,7 @@ with gr.Blocks() as demo:
891
  placeholder="Enter dataset name (e.g., conll2003)",
892
  scale=3
893
  )
894
- dataset_split = gr.Dropdown(
895
- choices=["train", "validation", "test"],
896
- value="train",
897
- label="Dataset Split",
898
- scale=2
899
- )
900
  load_dataset_btn = gr.Button("Load Dataset", scale=1)
901
- hf_status = gr.Textbox(label="Dataset Loading Status")
902
 
903
  bar = gr.Slider(
904
  minimum=0,
@@ -1002,16 +1011,17 @@ with gr.Blocks() as demo:
1002
  outputs=[inp_box, bar]
1003
  )
1004
 
1005
- def load_hf_dataset(name, split):
1006
- status = load_from_huggingface(name, split)
 
1007
  if "Successfully" in status:
1008
- return load_dataset(), status
1009
- return [status], 0, 0, status
1010
 
1011
  load_dataset_btn.click(
1012
  fn=load_hf_dataset,
1013
- inputs=[dataset_name, dataset_split],
1014
- outputs=[inp_box, bar, hf_status]
1015
  )
1016
 
1017
  apply_btn.click(fn=update_example, inputs=inp_box, outputs=inp_box)
 
146
  def load_dataset():
147
  global dynamic_dataset
148
  try:
149
+ print('load_dataset')
150
  with open("data/annotated_data.json", 'rt') as dataset:
151
  ANNOTATED_DATA = json.load(dataset)
152
  dynamic_dataset = DynamicDataset(ANNOTATED_DATA)
 
531
 
532
  return converted_data
533
 
534
+ def load_from_huggingface(dataset_name: str):
535
  """Load dataset from Hugging Face Hub"""
536
  try:
537
+ # Download the JSON file from Hugging Face
538
+ import requests
539
+ import json
540
+
541
+ # Construct the raw URL for the JSON file
542
+ raw_url = f"https://huggingface.co/datasets/{dataset_name}/raw/main/annotated_data.json"
543
+
544
+ # Download the file
545
+ response = requests.get(raw_url)
546
+ if response.status_code == 200:
547
+ print('response status', response.status_code)
548
+ print('response', response.text)
549
+ dataset = json.loads(response.text)
550
+ converted_data = dataset # Data is already in the correct format
551
+ else:
552
+ raise Exception(f"Failed to download dataset: {response.status_code}")
553
 
554
  # Save the converted data
555
  os.makedirs("data", exist_ok=True)
 
558
 
559
  return f"Successfully loaded and converted dataset: {dataset_name}"
560
  except Exception as e:
561
+ error_msg = f"Error loading dataset: {str(e)}"
562
+ return error_msg
563
 
564
  def load_from_local_file(file_path: str, file_format: str = "json"):
565
  """Load and convert data from local file in various formats"""
 
907
  placeholder="Enter dataset name (e.g., conll2003)",
908
  scale=3
909
  )
 
 
 
 
 
 
910
  load_dataset_btn = gr.Button("Load Dataset", scale=1)
 
911
 
912
  bar = gr.Slider(
913
  minimum=0,
 
1011
  outputs=[inp_box, bar]
1012
  )
1013
 
1014
+ def load_hf_dataset(name):
1015
+ status = load_from_huggingface(name)
1016
+ print('status', status)
1017
  if "Successfully" in status:
1018
+ return load_dataset()
1019
+ return [("Error loading dataset: " + status, None)], gr.update(value=0, maximum=1)
1020
 
1021
  load_dataset_btn.click(
1022
  fn=load_hf_dataset,
1023
+ inputs=[dataset_name],
1024
+ outputs=[inp_box, bar]
1025
  )
1026
 
1027
  apply_btn.click(fn=update_example, inputs=inp_box, outputs=inp_box)
data/annotated_data.json CHANGED
The diff for this file is too large to render. See raw diff