Spaces:
Sleeping
Sleeping
File size: 1,591 Bytes
aee7a7e 46f34e6 087e7aa da1ca5a aee7a7e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import os
import glob
import uuid
import json
from pathlib import Path
import gradio as gr
from huggingface_hub import CommitScheduler, hf_hub_download, login
HF_TOKEN = os.getenv("HF_TOKEN")
login(HF_TOKEN)
# Set up dataset storage
dataset_folder = Path("dataset")
dataset_folder.mkdir(exist_ok=True)
# Function to get the latest dataset file
def get_latest_dataset_file():
if files := glob.glob(str(dataset_folder / "data_*.jsonl")):
return max(files, key=os.path.getctime)
return None
# Check for existing dataset and create or append to it
if latest_file := get_latest_dataset_file():
dataset_file = Path(latest_file)
print(f"Appending to existing dataset file: {dataset_file}")
else:
dataset_file = dataset_folder / f"data_{uuid.uuid4()}.jsonl"
print(f"Creating new dataset file: {dataset_file}")
# Set up CommitScheduler for dataset uploads
repo_id = "wannaphong/d1" # Replace with your desired dataset repo
scheduler = CommitScheduler(
repo_id=repo_id,
repo_type="dataset",
folder_path=dataset_folder,
path_in_repo="data",
every=1, # Upload every 5 minutes
)
# Function to save feedback and generated data
def save_data(name):
data = {
"name": name,
}
with scheduler.lock:
with dataset_file.open("a") as f:
f.write(json.dumps(data) + "\n")
# return "Data saved and will be uploaded to the dataset repository."
def greet(name):
save_data(name)
return "Hello, " + name + "!"
demo = gr.Interface(
fn=greet,
inputs=["text"],
outputs=["text"],
)
demo.launch()
|