import os import glob import uuid import json from pathlib import Path import gradio as gr from huggingface_hub import CommitScheduler, hf_hub_download, login HF_TOKEN = os.getenv("HF_TOKEN") login(HF_TOKEN) # Set up dataset storage dataset_folder = Path("dataset") dataset_folder.mkdir(exist_ok=True) # Function to get the latest dataset file def get_latest_dataset_file(): if files := glob.glob(str(dataset_folder / "data_*.jsonl")): return max(files, key=os.path.getctime) return None # Check for existing dataset and create or append to it if latest_file := get_latest_dataset_file(): dataset_file = Path(latest_file) print(f"Appending to existing dataset file: {dataset_file}") else: dataset_file = dataset_folder / f"data_{uuid.uuid4()}.jsonl" print(f"Creating new dataset file: {dataset_file}") # Set up CommitScheduler for dataset uploads repo_id = "wannaphong/d1" # Replace with your desired dataset repo scheduler = CommitScheduler( repo_id=repo_id, repo_type="dataset", folder_path=dataset_folder, path_in_repo="data", every=1, # Upload every 5 minutes ) # Function to save feedback and generated data def save_data(name): data = { "name": name, } with scheduler.lock: with dataset_file.open("a") as f: f.write(json.dumps(data,ensure_ascii=False) + "\n") # return "Data saved and will be uploaded to the dataset repository." def greet(name): save_data(name) return "Hello, " + name + "!" demo = gr.Interface( fn=greet, inputs=["text"], outputs=["text"], ) demo.launch()