bias-test-gpt-pairs

Sleeping

App Files Files Community

RKocielnik commited on Jan 26

Commit

af9829f

verified ·

1 Parent(s): b829dd9

update sentence manager to new huggingface_hub

Browse files

Files changed (1) hide show

mgr_sentences.py +27 -3

mgr_sentences.py CHANGED Viewed

@@ -4,6 +4,7 @@ import re
 import pandas as pd
 import numpy as np
 import glob
 import huggingface_hub
 print("hfh", huggingface_hub.__version__)
 from huggingface_hub import hf_hub_download, upload_file, delete_file, snapshot_download, list_repo_files, dataset_info
@@ -64,6 +65,18 @@ def saveSentences(sentences_df):
     print(f"Org size: {grp_saved_df.shape[0]}, Mrg size: {new_grp_df.shape[0]}")
     store_group_sentences(filename, new_grp_df)
 # https://huggingface.co/spaces/elonmuskceo/persistent-data/blob/main/app.py
 def get_sentence_csv(file_path: str):
@@ -82,11 +95,22 @@ def get_sentence_csv(file_path: str):
     # file not found
     print(f"file not found, probably: {e}")
-  files=glob.glob(f"./{LOCAL_DATA_DIRNAME}/", recursive=True)
   print("Files glob: "+', '.join(files))
   #print("Save file:" + str(os.path.basename(file_path)))
-  df = pd.read_csv(os.path.join(LOCAL_DATA_DIRNAME, os.path.basename(file_path)), encoding='UTF8')
   return df

 import pandas as pd
 import numpy as np
 import glob
+from pathlib import Path
 import huggingface_hub
 print("hfh", huggingface_hub.__version__)
 from huggingface_hub import hf_hub_download, upload_file, delete_file, snapshot_download, list_repo_files, dataset_info
     print(f"Org size: {grp_saved_df.shape[0]}, Mrg size: {new_grp_df.shape[0]}")
     store_group_sentences(filename, new_grp_df)
+def list_folders_sorted_by_date(path):
+  # Convert string path to a Path object
+  directory = Path(path)
+  # Get all folders in the given directory
+  folders = [f for f in directory.iterdir() if f.is_dir()]
+  # Sort folders by modification time, most recent first
+  sorted_folders = sorted(folders, key=lambda x: x.stat().st_mtime, reverse=True)
+  # Return folder names
+  return [folder.name for folder in sorted_folders]
 # https://huggingface.co/spaces/elonmuskceo/persistent-data/blob/main/app.py
 def get_sentence_csv(file_path: str):
     # file not found
     print(f"file not found, probably: {e}")
+  ds_local_path = os.path.join(LOCAL_DATA_DIRNAME,
+                               "datasets--AnimaLab--bias-test-gpt-sentences",
+                               "snapshots")
+  folders_sorted = list_folders_sorted_by_date(ds_local_path)
+  print("---SENTENCE FOLDERS---")
+  print(os.path.join(ds_local_path, folders_sorted[0]))
+  files=glob.glob(os.path.join(ds_local_path, folders_sorted[0], file_path), recursive=True)
   print("Files glob: "+', '.join(files))
+  df = pd.read_csv(os.path.join(ds_local_path, folders_sorted[0], file_path), encoding='UTF8')
+  #files=glob.glob(f"./{LOCAL_DATA_DIRNAME}/", recursive=True)
+  #print("Files glob: "+', '.join(files))
   #print("Save file:" + str(os.path.basename(file_path)))
+  #df = pd.read_csv(os.path.join(LOCAL_DATA_DIRNAME, os.path.basename(file_path)), encoding='UTF8')
   return df