File size: 1,591 Bytes
aee7a7e
46f34e6
087e7aa
da1ca5a
aee7a7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os
import glob
import uuid
import json
from pathlib import Path
import gradio as gr
from huggingface_hub import CommitScheduler, hf_hub_download, login

HF_TOKEN = os.getenv("HF_TOKEN")
login(HF_TOKEN)

# Set up dataset storage
dataset_folder = Path("dataset")
dataset_folder.mkdir(exist_ok=True)

# Function to get the latest dataset file
def get_latest_dataset_file():
    if files := glob.glob(str(dataset_folder / "data_*.jsonl")):
        return max(files, key=os.path.getctime)
    return None


# Check for existing dataset and create or append to it
if latest_file := get_latest_dataset_file():
    dataset_file = Path(latest_file)
    print(f"Appending to existing dataset file: {dataset_file}")
else:
    dataset_file = dataset_folder / f"data_{uuid.uuid4()}.jsonl"
    print(f"Creating new dataset file: {dataset_file}")
# Set up CommitScheduler for dataset uploads
repo_id = "wannaphong/d1"  # Replace with your desired dataset repo
scheduler = CommitScheduler(
    repo_id=repo_id,
    repo_type="dataset",
    folder_path=dataset_folder,
    path_in_repo="data",
    every=1,  # Upload every 5 minutes
)
# Function to save feedback and generated data
def save_data(name):
    data = {
        "name": name,
    }
    with scheduler.lock:
        with dataset_file.open("a") as f:
            f.write(json.dumps(data) + "\n")
    # return "Data saved and will be uploaded to the dataset repository."



def greet(name):
    save_data(name)
    return "Hello, " + name + "!"

demo = gr.Interface(
    fn=greet,
    inputs=["text"],
    outputs=["text"],
)

demo.launch()