d1 / app.py
wannaphong's picture
Update app.py
2dbe598 verified
import os
import glob
import uuid
import json
from pathlib import Path
import gradio as gr
from huggingface_hub import CommitScheduler, hf_hub_download, login
HF_TOKEN = os.getenv("HF_TOKEN")
login(HF_TOKEN)
# Set up dataset storage
dataset_folder = Path("dataset")
dataset_folder.mkdir(exist_ok=True)
# Function to get the latest dataset file
def get_latest_dataset_file():
if files := glob.glob(str(dataset_folder / "data_*.jsonl")):
return max(files, key=os.path.getctime)
return None
# Check for existing dataset and create or append to it
if latest_file := get_latest_dataset_file():
dataset_file = Path(latest_file)
print(f"Appending to existing dataset file: {dataset_file}")
else:
dataset_file = dataset_folder / f"data_{uuid.uuid4()}.jsonl"
print(f"Creating new dataset file: {dataset_file}")
# Set up CommitScheduler for dataset uploads
repo_id = "wannaphong/d1" # Replace with your desired dataset repo
scheduler = CommitScheduler(
repo_id=repo_id,
repo_type="dataset",
folder_path=dataset_folder,
path_in_repo="data",
every=1, # Upload every 5 minutes
)
# Function to save feedback and generated data
def save_data(name):
data = {
"name": name,
}
with scheduler.lock:
with dataset_file.open("a") as f:
f.write(json.dumps(data,ensure_ascii=False) + "\n")
# return "Data saved and will be uploaded to the dataset repository."
def greet(name):
save_data(name)
return "Hello, " + name + "!"
demo = gr.Interface(
fn=greet,
inputs=["text"],
outputs=["text"],
)
demo.launch()