Spaces:
Sleeping
Sleeping
import os | |
import glob | |
import uuid | |
import json | |
from pathlib import Path | |
import gradio as gr | |
from huggingface_hub import CommitScheduler, hf_hub_download, login | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
login(HF_TOKEN) | |
# Set up dataset storage | |
dataset_folder = Path("dataset") | |
dataset_folder.mkdir(exist_ok=True) | |
# Function to get the latest dataset file | |
def get_latest_dataset_file(): | |
if files := glob.glob(str(dataset_folder / "data_*.jsonl")): | |
return max(files, key=os.path.getctime) | |
return None | |
# Check for existing dataset and create or append to it | |
if latest_file := get_latest_dataset_file(): | |
dataset_file = Path(latest_file) | |
print(f"Appending to existing dataset file: {dataset_file}") | |
else: | |
dataset_file = dataset_folder / f"data_{uuid.uuid4()}.jsonl" | |
print(f"Creating new dataset file: {dataset_file}") | |
# Set up CommitScheduler for dataset uploads | |
repo_id = "wannaphong/d1" # Replace with your desired dataset repo | |
scheduler = CommitScheduler( | |
repo_id=repo_id, | |
repo_type="dataset", | |
folder_path=dataset_folder, | |
path_in_repo="data", | |
every=1, # Upload every 5 minutes | |
) | |
# Function to save feedback and generated data | |
def save_data(name): | |
data = { | |
"name": name, | |
} | |
with scheduler.lock: | |
with dataset_file.open("a") as f: | |
f.write(json.dumps(data,ensure_ascii=False) + "\n") | |
# return "Data saved and will be uploaded to the dataset repository." | |
def greet(name): | |
save_data(name) | |
return "Hello, " + name + "!" | |
demo = gr.Interface( | |
fn=greet, | |
inputs=["text"], | |
outputs=["text"], | |
) | |
demo.launch() | |